From 8059fe16d3ac66a04c73f9e6b8094f720326e30a Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Thu, 12 Dec 2024 12:41:02 +0530
Subject: [PATCH 01/10] autohpl extract test cases

---
 quisby/benchmarks/auto_hpl/extract.py | 80 ++++++++++++++++++++-------
 1 file changed, 59 insertions(+), 21 deletions(-)

diff --git a/quisby/benchmarks/auto_hpl/extract.py b/quisby/benchmarks/auto_hpl/extract.py
index bd922a8..76f23df 100644
--- a/quisby/benchmarks/auto_hpl/extract.py
+++ b/quisby/benchmarks/auto_hpl/extract.py
@@ -1,31 +1,69 @@
-import csv
-
-from quisby.pricing import cloud_pricing
+import logging
+from typing import List, Dict, Optional
+from pathlib import Path
 from quisby.benchmarks.linpack.extract import linpack_format_data
 
+logger = logging.getLogger(__name__)
+
+
+def extract_auto_hpl_data(
+        path: str,
+        system_name: str
+) -> Optional[List[Dict[str, str]]]:
+    """
+    Extract Auto HPL benchmark data from a CSV file.
+
+    Args:
+        path (str): Path to the CSV file
+        system_name (str): Name of the system being analyzed
+
+    Returns:
+        Optional[List[Dict[str, str]]]: Processed benchmark results or None
+
+    Raises:
+        FileNotFoundError: If the specified file does not exist
+        PermissionError: If there are insufficient permissions to read the file
+        ValueError: If the file format is incorrect
+    """
+    # Validate input path
+    file_path = Path(path)
+
+    # Check file existence and extension
+    if not file_path.exists():
+        raise FileNotFoundError(f"File not found: {path}")
+    if file_path.suffix.lower() != '.csv':
+        raise ValueError(f"Invalid file type. Expected .csv, got {file_path.suffix}")
 
-def extract_auto_hpl_data(path, system_name):
+    # Read file with proper error handling
+    with open(file_path, 'r', encoding='utf-8') as file:
+        file_data = file.readlines()
 
-    if path.endswith(".csv"):
-        with open(path) as file:
-            results = []
-            file_data = file.readlines()
+        # Check for minimum required data
+        if len(file_data) < 2:
+            logger.warning(f"Insufficient data in file: {path}")
+            return None
 
-            if len(file_data) > 1:
-                header_row = file_data[-2].strip().split(":")
-                data_row = file_data[-1].strip().split(":")
+        # Extract header and data rows
+        header_row = file_data[-2].strip().split(":")
+        data_row = file_data[-1].strip().split(":")
 
-                data_dict = {}
-                for key, value in zip(header_row, data_row):
-                    data_dict[key] = value
+        # Validate data extraction
+        if len(header_row) != len(data_row):
+            raise ValueError("Mismatched header and data lengths")
 
-                results = linpack_format_data(
-                    results=results, system_name=system_name, gflops=data_dict["Gflops"]
-                )
+        # Create dictionary from rows
+        data_dict = dict(zip(header_row, data_row))
 
-                if results:
-                    return results
+        # Validate required field
+        if 'Gflops' not in data_dict:
+            raise KeyError("Missing 'Gflops' in data")
 
-            else:
-                return None
+        # Process and format data
+        results: List[Dict[str, str]] = []
+        formatted_results = linpack_format_data(
+            results=results,
+            system_name=system_name,
+            gflops=data_dict["Gflops"]
+        )
 
+        return formatted_results if formatted_results else None

From d26b5cb4ce5140c95db271d9da38fb84b2362a8b Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Thu, 12 Dec 2024 12:46:37 +0530
Subject: [PATCH 02/10] autohpl extract test cases

---
 tests/__init__.py                             |  0
 tests/test_benchmarks/__init__.py             |  0
 .../data/auto_hpl/empty_data.csv              |  0
 .../data/auto_hpl/insufficient_data.csv       |  1 +
 .../data/auto_hpl/invalid_data.csv            |  2 +
 .../data/auto_hpl/invalid_data.txt            |  1 +
 .../data/auto_hpl/mismatched_header_data.csv  |  2 +
 .../data/auto_hpl/missing_gflops.csv          |  2 +
 .../data/auto_hpl/permission_error_file.csv   |  0
 .../data/auto_hpl/valid_data.csv              | 17 +++++
 tests/test_benchmarks/test_auto_hpl.py        | 67 +++++++++++++++++++
 11 files changed, 92 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_benchmarks/__init__.py
 create mode 100644 tests/test_benchmarks/data/auto_hpl/empty_data.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/insufficient_data.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/invalid_data.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/invalid_data.txt
 create mode 100644 tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/missing_gflops.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/permission_error_file.csv
 create mode 100644 tests/test_benchmarks/data/auto_hpl/valid_data.csv
 create mode 100644 tests/test_benchmarks/test_auto_hpl.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_benchmarks/__init__.py b/tests/test_benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_benchmarks/data/auto_hpl/empty_data.csv b/tests/test_benchmarks/data/auto_hpl/empty_data.csv
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv b/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv
new file mode 100644
index 0000000..322a34d
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv
@@ -0,0 +1 @@
+T/V:N:NB:P:Q:Time:Gflops
diff --git a/tests/test_benchmarks/data/auto_hpl/invalid_data.csv b/tests/test_benchmarks/data/auto_hpl/invalid_data.csv
new file mode 100644
index 0000000..1bbb201
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/invalid_data.csv
@@ -0,0 +1,2 @@
+T/V:N:NB:P:Q:Time:Gflops
+WR12R2R4:9216:256:1:1:13.05:InvalidGflops
diff --git a/tests/test_benchmarks/data/auto_hpl/invalid_data.txt b/tests/test_benchmarks/data/auto_hpl/invalid_data.txt
new file mode 100644
index 0000000..0ed50ca
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/invalid_data.txt
@@ -0,0 +1 @@
+This is not a CSV file
diff --git a/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv b/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv
new file mode 100644
index 0000000..1929101
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv
@@ -0,0 +1,2 @@
+T/V:N:NB:P:Q:Time:Gflops
+WR12R2R4:9216:256:1:1:13.05:40.0:extra_column
diff --git a/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv b/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv
new file mode 100644
index 0000000..6d35d6e
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv
@@ -0,0 +1,2 @@
+T/V:N:NB:P:Q:Time
+WR12R2R4:9216:256:1:1:13.05
diff --git a/tests/test_benchmarks/data/auto_hpl/permission_error_file.csv b/tests/test_benchmarks/data/auto_hpl/permission_error_file.csv
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_benchmarks/data/auto_hpl/valid_data.csv b/tests/test_benchmarks/data/auto_hpl/valid_data.csv
new file mode 100644
index 0000000..e1582d1
--- /dev/null
+++ b/tests/test_benchmarks/data/auto_hpl/valid_data.csv
@@ -0,0 +1,17 @@
+# Test general meta start
+# Test: auto_hpl
+# Results version: 1.0
+# Host: c4a-standard-16
+# Sys environ: gcp
+# Tuned: tuned_none
+# OS: 5.14.0-503.11.1.el9_5.aarch64
+# Numa nodes: 1
+# CPU family: Neoverse-V2
+# Number cpus: 16
+# Memory: 65121408kB
+# Test general meta end
+# Test meta data start
+# /usr/lib64/openmpi/bin/mpirun --allow-run-as-root -np 1 --mca btl self,vader --report-bindings --map-by l3cache -x OMP_NUM_THREADS=16 ./xhpl
+# Test meta data end
+T/V:N:NB:P:Q:Time:Gflops
+WR12R2R4:78336:256:1:1:659.06:4.8627e+02
\ No newline at end of file
diff --git a/tests/test_benchmarks/test_auto_hpl.py b/tests/test_benchmarks/test_auto_hpl.py
new file mode 100644
index 0000000..3614a4e
--- /dev/null
+++ b/tests/test_benchmarks/test_auto_hpl.py
@@ -0,0 +1,67 @@
+import unittest
+import os
+from unittest.mock import patch
+from pathlib import Path
+from quisby.benchmarks.auto_hpl.extract import extract_auto_hpl_data
+from quisby.benchmarks.linpack.extract import linpack_format_data
+
+class TestAutoHPLExtract(unittest.TestCase):
+
+    # Helper function to get the path for the sample data
+    def get_sample_data_path(self, filename):
+        return os.path.join(os.path.dirname(__file__), 'data', 'auto_hpl', filename)
+
+    # Test when the file is correctly formatted
+    @patch("quisby.benchmarks.auto_hpl.extract.linpack_format_data")
+    def test_valid_file(self, mock_linpack_format_data):
+        valid_file_path = self.get_sample_data_path("valid_data.csv")
+        mock_linpack_format_data.return_value = [{"system": "TestSystem", "gflops": '4.8627e+02'}]
+        system_name = "TestSystem"
+
+        result = extract_auto_hpl_data(valid_file_path, system_name)
+        mock_linpack_format_data.assert_called_with(
+            results=[], system_name=system_name, gflops="4.8627e+02"
+        )
+        self.assertEqual(result, [{"system": "TestSystem", "gflops": "4.8627e+02"}])
+
+    # Test when the file does not exist (FileNotFoundError)
+    def test_file_not_found(self):
+        invalid_file_path = "/path/to/nonexistent/file.csv"
+        with self.assertRaises(FileNotFoundError):
+            extract_auto_hpl_data(invalid_file_path, "TestSystem")
+
+    # Test when the file does not have the correct extension (ValueError)
+    def test_invalid_file_extension(self):
+        invalid_file_path = self.get_sample_data_path("invalid_data.txt")  # A non-CSV file
+        with self.assertRaises(ValueError):
+            res = extract_auto_hpl_data(invalid_file_path, "TestSystem")
+            print(res)
+
+    # Test when the file has insufficient data (less than two lines)
+    def test_insufficient_data(self):
+        insufficient_data_file_path = self.get_sample_data_path("insufficient_data.csv")
+        result = extract_auto_hpl_data(insufficient_data_file_path, "TestSystem")
+        self.assertIsNone(result)
+
+    # Test when the Gflops field is missing (KeyError)
+    def test_missing_gflops(self):
+        missing_gflops_file_path = self.get_sample_data_path("missing_gflops.csv")
+        with self.assertRaises(KeyError):
+            extract_auto_hpl_data(missing_gflops_file_path, "TestSystem")
+
+    # Test when there is a mismatch in header and data length (ValueError)
+    def test_mismatched_header_and_data(self):
+        mismatched_file_path = self.get_sample_data_path("mismatched_header_data.csv")
+        with self.assertRaises(ValueError):
+            extract_auto_hpl_data(mismatched_file_path, "TestSystem")
+
+    # Test when there are permission issues with the file (PermissionError)
+    def test_permission_error(self):
+        permission_error_file_path = self.get_sample_data_path("permission_error_file.csv")
+        # Mocking os.path.exists and open to simulate a PermissionError
+        with patch("builtins.open", side_effect=PermissionError("Permission denied")):
+            with self.assertRaises(PermissionError):
+                extract_auto_hpl_data(permission_error_file_path, "TestSystem")
+
+if __name__ == '__main__':
+    unittest.main()

From d17bf7d647fba20d6f108f97107cf6a5873bf6ec Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:18:42 +0530
Subject: [PATCH 03/10] auto_hpl and linpack reformatting

---
 quisby/benchmarks/auto_hpl/comparison.py |  39 +++---
 quisby/benchmarks/auto_hpl/summary.py    |  22 ++-
 quisby/benchmarks/linpack/comparison.py  |  89 +++++++-----
 quisby/benchmarks/linpack/extract.py     | 166 +++++++++++++++--------
 quisby/benchmarks/linpack/summary.py     |  96 +++++++++----
 5 files changed, 278 insertions(+), 134 deletions(-)

diff --git a/quisby/benchmarks/auto_hpl/comparison.py b/quisby/benchmarks/auto_hpl/comparison.py
index eb4db6b..0c2d9f4 100644
--- a/quisby/benchmarks/auto_hpl/comparison.py
+++ b/quisby/benchmarks/auto_hpl/comparison.py
@@ -1,20 +1,25 @@
 from quisby.benchmarks.linpack.comparison import compare_linpack_results
 
 
-def compare_auto_hpl_results(spreadsheets, spreadsheetId, test_name):
-    compare_linpack_results(spreadsheets, spreadsheetId, test_name)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+def compare_auto_hpl_results(spreadsheets, spreadsheet_id, test_name):
+    """
+    Compares AutoHPL results using Linpack benchmark data.
+
+    This function calls the `compare_linpack_results` function to compare
+    Linpack results for AutoHPL tests. It uses provided spreadsheets and test
+    details to perform the comparison.
+
+    Args:
+        spreadsheets (list): A list of spreadsheet data to compare.
+        spreadsheet_id (str): The ID of the spreadsheet containing the results.
+        test_name (str): The name of the test to compare.
+
+    Returns:
+        None
+    """
+    try:
+        # Call the Linpack comparison function with the provided arguments
+        compare_linpack_results(spreadsheets, spreadsheet_id, test_name)
+    except Exception as e:
+        # Handle errors that may occur during comparison
+        raise RuntimeError(f"Error comparing AutoHPL results: {str(e)}")
diff --git a/quisby/benchmarks/auto_hpl/summary.py b/quisby/benchmarks/auto_hpl/summary.py
index dbb2229..60616ae 100644
--- a/quisby/benchmarks/auto_hpl/summary.py
+++ b/quisby/benchmarks/auto_hpl/summary.py
@@ -1,5 +1,23 @@
 from quisby.benchmarks.linpack.summary import create_summary_linpack_data
 
 
-def create_summary_auto_hpl_data(results,OS_RELEASE):
-    return create_summary_linpack_data(results,OS_RELEASE)
\ No newline at end of file
+def create_summary_auto_hpl_data(results, os_release):
+    """
+    Creates a summary of AutoHPL test results.
+
+    This function calls `create_summary_linpack_data` to generate a summary of
+    the AutoHPL test results based on the provided data and OS release information.
+
+    Args:
+        results (list): The test results to be summarized.
+        os_release (str): The operating system release version used for the test.
+
+    Returns:
+        Any: Returns the summary data generated by `create_summary_linpack_data`.
+    """
+    try:
+        # Call the function to create the summary for AutoHPL test results
+        return create_summary_linpack_data(results, os_release)
+    except Exception as e:
+        # Handle potential errors and raise with a descriptive message
+        raise RuntimeError(f"Error creating summary for AutoHPL data: {str(e)}")
diff --git a/quisby/benchmarks/linpack/comparison.py b/quisby/benchmarks/linpack/comparison.py
index e28cb71..c3e0453 100644
--- a/quisby/benchmarks/linpack/comparison.py
+++ b/quisby/benchmarks/linpack/comparison.py
@@ -1,5 +1,4 @@
 from quisby import custom_logger
-
 from quisby.sheet.sheet_util import (
     read_sheet,
     append_to_sheet,
@@ -10,18 +9,35 @@
 )
 from quisby.util import percentage_deviation
 
+def compare_linpack_results(spreadsheets, spreadsheet_id, test_name):
+    """
+    Compares Linpack test results from two spreadsheets and appends the comparison results
+    to the specified spreadsheet.
+
+    This function compares the GFLOPS, scaling, and price-performance data between
+    two sets of test results, calculates the percentage differences, and updates the
+    results on a Google Sheet.
 
-def compare_linpack_results(spreadsheets, spreadsheetId, test_name):
+    Args:
+        spreadsheets (list): A list of spreadsheets containing the test data to compare.
+        spreadsheet_id (str): The ID of the spreadsheet to append the results to.
+        test_name (str): The name of the test whose results are being compared.
+
+    Returns:
+        str: The ID of the spreadsheet where the results were appended, or the same ID if the operation fails.
+    """
     values = []
     results = []
-    spreadsheet_name = []
+    spreadsheet_names = []
 
+    # Read the test data from both spreadsheets
     for spreadsheet in spreadsheets:
         values.append(read_sheet(spreadsheet, test_name))
-        spreadsheet_name.append(
+        spreadsheet_names.append(
             get_sheet(spreadsheet, test_name)["properties"]["title"]
         )
 
+    # Initialize results with headers
     for value in values[0]:
         for ele in values[1]:
             if value[0] == "System" and ele[0] == "System":
@@ -42,38 +58,43 @@ def compare_linpack_results(spreadsheets, spreadsheetId, test_name):
                     ]
                 )
                 break
-            else:
-                if value[0] == ele[0]:
-                    price_perf = []
-                    price_perf.append(float(value[2]) / float(value[4]))
-                    price_perf.append(float(ele[2]) / float(ele[4]))
-                    price_perf_diff = percentage_deviation(price_perf[0], price_perf[1])
-                    percentage_diff = percentage_deviation(value[2], ele[2])
-                    gflop_diff = percentage_deviation(value[3], ele[3])
-                    results.append(
-                        [
-                            value[0],
-                            value[1],
-                            value[2],
-                            ele[2],
-                            percentage_diff,
-                            value[3],
-                            ele[3],
-                            gflop_diff,
-                            value[4],
-                            price_perf[0],
-                            price_perf[1],
-                            price_perf_diff,
-                        ]
-                    )
+            elif value[0] == ele[0]:
+                # Calculate percentage differences for GFLOPS, scaling, and price-performance
+                price_perf = [
+                    float(value[2]) / float(value[4]),
+                    float(ele[2]) / float(ele[4]),
+                ]
+                price_perf_diff = percentage_deviation(price_perf[0], price_perf[1])
+                percentage_diff = percentage_deviation(value[2], ele[2])
+                gflop_diff = percentage_deviation(value[3], ele[3])
+
+                results.append(
+                    [
+                        value[0],
+                        value[1],
+                        value[2],
+                        ele[2],
+                        percentage_diff,
+                        value[3],
+                        ele[3],
+                        gflop_diff,
+                        value[4],
+                        price_perf[0],
+                        price_perf[1],
+                        price_perf_diff,
+                    ]
+                )
+
+    # Attempt to update the spreadsheet with the new comparison data
     try:
-        create_sheet(spreadsheetId, test_name)
+        create_sheet(spreadsheet_id, test_name)
         custom_logger.info("Deleting existing charts and data from the sheet...")
-        clear_sheet_charts(spreadsheetId, test_name)
-        clear_sheet_data(spreadsheetId, test_name)
-        custom_logger.info("Appending new " + test_name + " data to sheet...")
-        append_to_sheet(spreadsheetId, results, test_name)
+        clear_sheet_charts(spreadsheet_id, test_name)
+        clear_sheet_data(spreadsheet_id, test_name)
+        custom_logger.info(f"Appending new {test_name} data to sheet...")
+        append_to_sheet(spreadsheet_id, results, test_name)
     except Exception as exc:
+        # Log the error and return the spreadsheet ID if the operation fails
         custom_logger.debug(str(exc))
         custom_logger.error("Failed to append data to sheet")
-        return spreadsheetId
+        return spreadsheet_id
diff --git a/quisby/benchmarks/linpack/extract.py b/quisby/benchmarks/linpack/extract.py
index 3b69d11..7b3d7aa 100644
--- a/quisby/benchmarks/linpack/extract.py
+++ b/quisby/benchmarks/linpack/extract.py
@@ -1,93 +1,145 @@
 import csv
 import glob
-import os.path
+import logging
+import os
 import re
-
 from quisby.pricing.cloud_pricing import get_cloud_pricing, get_cloud_cpu_count
 from quisby.util import read_config
 
+# Setting up logger for better error tracking and debugging
+logger = logging.getLogger(__name__)
+
 
 def linpack_format_data(**kwargs):
     """
-    Add data into format to be shown in spreadsheets
-    Supports linpack like data. eg: autohpl
+    Adds data into a format suitable for spreadsheets.
+
+    This function processes Linpack-like data (e.g., autohpl) to include system
+    information, GFLOPS, pricing, and CPU cores.
+
+    Args:
+        kwargs: A dictionary containing the required input data, including:
+            - 'results': List to store formatted data.
+            - 'system_name': The name of the system being tested.
+            - 'gflops': The GFLOPS result from the test.
+
+    Returns:
+        list: Updated 'results' list with the new data.
+        None: If GFLOPS data is not available or invalid.
     """
     region = read_config("cloud", "region")
-    cloud_type = read_config("cloud", "cloud_type")
+    cloud_type = read_config("cloud", "cloud_type").lower()
     os_release = read_config("test", "OS_RELEASE")
     os_type = read_config("test", "os_type")
-    results = kwargs["results"] if kwargs["results"] else []
-    system_name = kwargs["system_name"] if kwargs["system_name"] else None
-    if kwargs["gflops"]:
-        gflops = float(kwargs["gflops"])
-    else:
+
+    results = kwargs.get("results", [])
+    system_name = kwargs.get("system_name")
+
+    # Ensure GFLOPS is provided and valid
+    gflops = kwargs.get("gflops")
+    if not gflops:
+        logger.warning(f"GFLOPS value is missing for system {system_name}. Skipping.")
         return None
 
-    price_per_hour = get_cloud_pricing(
-        system_name, region, cloud_type.lower(), os_type
-    )
-
-    no_of_cores = get_cloud_cpu_count(
-        system_name, region, cloud_type.lower()
-    )
-
-    results.append(
-        [
-            system_name,
-            no_of_cores,
-            gflops,
-            1,
-            price_per_hour,
-            float(gflops) / float(price_per_hour),
-        ]
-    )
+    try:
+        gflops = float(gflops)
+    except ValueError:
+        logger.error(f"Invalid GFLOPS value: {gflops}. Could not convert to float.")
+        raise ValueError(f"Invalid GFLOPS value: {gflops}. Could not convert to float.")
+
+    # Fetch pricing and CPU details from the cloud pricing API
+    try:
+        price_per_hour = get_cloud_pricing(system_name, region, cloud_type, os_type)
+        no_of_cores = get_cloud_cpu_count(system_name, region, cloud_type)
+    except Exception as e:
+        logger.error(f"Error fetching cloud pricing or CPU count for system {system_name}: {str(e)}")
+        raise RuntimeError(f"Error fetching cloud pricing or CPU count: {str(e)}")
+
+    # If price_per_hour is invalid or 0, return an empty result to avoid divide by zero errors
+    if not price_per_hour or price_per_hour == 0.0:
+        logger.warning(f"Invalid price_per_hour for system {system_name}, skipping.")
+        return []
+
+    # Append formatted data to results
+    results.append([
+        system_name,
+        no_of_cores,
+        gflops,
+        1,  # Assuming '1' refers to a single test instance
+        price_per_hour,
+        gflops / price_per_hour
+    ])
 
     return results
 
 
 def extract_linpack_data(path, system_name):
     """
-    Make shift function to handle linpack summary data
-    till a resolution is reached
-    """
+    Extracts Linpack summary data from files and formats it for analysis.
+
+    This function handles the extraction of data from Linpack summary files
+    and provides information about GFLOPS and the number of cores used.
 
+    Args:
+        path (str): Path to the directory containing Linpack summary files.
+        system_name (str): Name of the system being tested.
+
+    Returns:
+        tuple: A tuple containing:
+            - list: Processed Linpack results.
+            - list: Summary data including file paths for reference.
+    """
     results = []
+    summary_data = []
     no_of_cores = None
     gflops = None
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
 
+    # Check if the summary file exists
     summary_file = path
-
     if not os.path.isfile(summary_file):
-        return None
-
-    if os.path.basename(summary_file).endswith("csv"):
-        with open(summary_file) as csv_file:
-            csv_reader = csv.DictReader(csv_file, delimiter=":")
-            list_data = list(csv_reader)
-            last_row = list_data[-1]
-            gflops = last_row["MB/sec"]
-            threads = last_row["threads"]
-        summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
+        logger.error(f"Summary file {summary_file} not found for system {system_name}.")
+        raise FileNotFoundError(f"Summary file {summary_file} not found.")
+
+    # Process CSV summary file
+    if summary_file.endswith("csv"):
+        try:
+            with open(summary_file, 'r') as csv_file:
+                csv_reader = csv.DictReader(csv_file, delimiter=":")
+                list_data = list(csv_reader)
+                last_row = list_data[-1]
+
+                gflops = last_row.get("MB/sec")
+                threads = last_row.get("threads")
+        except Exception as e:
+            logger.error(f"Error reading CSV summary file {summary_file}: {str(e)}")
+            raise RuntimeError(f"Error reading CSV summary file {summary_file}: {str(e)}")
     else:
-        return results, summary_data
-
-    for file_path in glob.glob(path + f"/linpack*_threads_{threads}_*"):
-        with open(file_path) as txt_file:
-            data = txt_file.readlines()
-            for row in data:
-                if re.findall(r"Number of cores: (\d+)", row):
-                    no_of_cores = re.findall(r"Number of cores: (\d+)", row)[0]
-                    break
-
+        # Return empty results if the file is not CSV
+        logger.warning(f"Summary file {summary_file} is not in CSV format. Skipping.")
+        return results
+
+    # Process individual Linpack result files
+    if threads:
+        for file_path in glob.glob(f"{path}/linpack*_threads_{threads}_*"):
+            try:
+                with open(file_path, 'r') as txt_file:
+                    data = txt_file.readlines()
+                    for row in data:
+                        match = re.search(r"Number of cores: (\d+)", row)
+                        if match:
+                            no_of_cores = match.group(1)
+                            break
+            except Exception as e:
+                logger.error(f"Error reading Linpack result file {file_path}: {str(e)}")
+                raise RuntimeError(f"Error reading Linpack result file {file_path}: {str(e)}")
+
+    # If GFLOPS data is found, format and append it
     if gflops:
         results = linpack_format_data(
             results=results,
             system_name=system_name,
             no_of_cores=no_of_cores,
-            gflops=gflops,
+            gflops=gflops
         )
 
-        return results, summary_data
+    return results
diff --git a/quisby/benchmarks/linpack/summary.py b/quisby/benchmarks/linpack/summary.py
index 93a44d4..eeea478 100644
--- a/quisby/benchmarks/linpack/summary.py
+++ b/quisby/benchmarks/linpack/summary.py
@@ -1,10 +1,21 @@
 import re
 from itertools import groupby
-
 from quisby.util import mk_int, process_instance, read_config
 
 
 def extract_prefix_and_number(input_string):
+    """
+    Extracts the prefix, number, and suffix from a given string.
+
+    Args:
+        input_string (str): The string to extract the prefix, number, and suffix from.
+
+    Returns:
+        tuple: A tuple containing:
+            - prefix (str): The prefix part of the string.
+            - number (int): The number extracted from the string.
+            - suffix (str): The suffix part of the string.
+    """
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
     if match:
         prefix = match.group(1)
@@ -15,38 +26,67 @@ def extract_prefix_and_number(input_string):
 
 
 def custom_key(item):
-    cloud_type = read_config("cloud","cloud_type")
+    """
+    Generates a custom key for sorting/grouping based on the cloud type and item format.
+
+    Args:
+        item (tuple): The item to generate the key for. The item is expected to be a tuple
+                      where the first element is a string representing the instance type.
+
+    Returns:
+        tuple: A tuple used as the sorting/grouping key.
+    """
+    cloud_type = read_config("cloud", "cloud_type")
+
     if item[0] == "local":
         return item[0]
     elif cloud_type == "aws":
-        instance_type =item[0].split(".")[0]
-        instance_number = item[0].split(".")[1]
+        instance_type, instance_number = item[0].split(".")[0], item[0].split(".")[1]
         return instance_type, instance_number
     elif cloud_type == "gcp":
-         instance_type = item[0].split("-")[0]
-         instance_number = int(item[0].split('-')[-1])
-         return instance_type, instance_number
+        instance_type = item[0].split("-")[0]
+        instance_number = int(item[0].split('-')[-1])
+        return instance_type, instance_number
     elif cloud_type == "azure":
-        instance_type, instance_number, version=extract_prefix_and_number(item[0])
+        instance_type, version, instance_number = extract_prefix_and_number(item[0])
         return instance_type, version, instance_number
 
 
 def group_data(results):
+    """
+    Groups the data based on cloud type and instance attributes.
+
+    Args:
+        results (list): A list of results that need to be grouped.
+
+    Returns:
+        itertools.groupby: A grouped object based on the instance attributes.
+    """
     cloud_type = read_config("cloud", "cloud_type")
+
     if cloud_type == "aws":
         return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "feature", "machine_type"))
     elif cloud_type == "azure":
         results = sorted(results, key=lambda x: process_instance(x[0], "family", "feature"))
         return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "feature"))
     elif cloud_type == "gcp":
-        return groupby(results, key=lambda x: process_instance(x[0], "family", "version","sub_family","feature"))
+        return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "sub_family", "feature"))
     elif cloud_type == "local":
         return groupby(results, key=lambda x: process_instance(x[0], "family"))
 
 
-
 def sort_data(results):
+    """
+    Sorts the results based on cloud type and instance attributes.
+
+    Args:
+        results (list): A list of results to be sorted.
+
+    Returns:
+        list: The sorted results.
+    """
     cloud_type = read_config("cloud", "cloud_type")
+
     if cloud_type == "aws":
         results.sort(key=lambda x: str(process_instance(x[0], "family")))
     elif cloud_type == "azure":
@@ -57,28 +97,38 @@ def sort_data(results):
         return groupby(results, key=lambda x: process_instance(x[0], "family"))
 
 
-def create_summary_linpack_data(results, OS_RELEASE):
+def create_summary_linpack_data(results, os_release):
+    """
+    Creates a summary of Linpack test data, including GFLOPS, scaling, and cost information.
+
+    Args:
+        results (list): The results from the Linpack test that need to be summarized.
+        os_release (str): The OS release for which the summary is being created.
+
+    Returns:
+        list: The summarized results, including headers and computed values.
+    """
     sorted_results = []
-    header = []
-    header.append(
+    header = [
         [
             "System",
             "Cores",
-            f"GFLOPS-{OS_RELEASE}",
-            f"GFLOP Scaling-{OS_RELEASE}",
+            f"GFLOPS-{os_release}",
+            f"GFLOP Scaling-{os_release}",
             "Cost/hr",
-            f"Price-perf-{OS_RELEASE}",
+            f"Price-perf-{os_release}",
         ]
-    )
+    ]
 
-    results = list(filter(None, results))
+    results = list(filter(None, results))  # Remove any None entries
     sort_data(results)
-    #results.sort(key=lambda x: str((x[0], "family", "version","sub_family", "feature")))
 
-    for _, items in  group_data(results):
+    for _, items in group_data(results):
         items = list(items)
         sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[0], "size")))
+
         cpu_scale, base_gflops = None, None
+
         for index, row in enumerate(sorted_data):
             if not cpu_scale and not base_gflops:
                 cpu_scale = int(row[1])
@@ -90,10 +140,8 @@ def create_summary_linpack_data(results, OS_RELEASE):
                     cpu_scaling = 0
                 gflops_scaling = float(row[2]) / (int(row[1]) - cpu_scale) / base_gflops if cpu_scaling != 0 else 1
                 sorted_data[index][3] = format(gflops_scaling, ".4f")
-        res = []
-        for item in sorted_data:
-            res.append(item)
+
+        res = [item for item in sorted_data]
         sorted_results += header + res
-        # sorted_results += header_row + sorted_data
 
     return sorted_results

From edfb0102b2b205709b4fc5fb0c0c45dc33360fc8 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:19:02 +0530
Subject: [PATCH 04/10] auto_hpl and linpack reformatting

---
 quisby/benchmarks/auto_hpl/graph.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/quisby/benchmarks/auto_hpl/graph.py b/quisby/benchmarks/auto_hpl/graph.py
index bf1333d..3c475dd 100644
--- a/quisby/benchmarks/auto_hpl/graph.py
+++ b/quisby/benchmarks/auto_hpl/graph.py
@@ -1,6 +1,25 @@
 from quisby.benchmarks.linpack.graph import graph_linpack_data
 
 
-def graph_auto_hpl_data(spreadsheetId, test_name, action):
-    return graph_linpack_data(spreadsheetId, test_name, action)
+def graph_auto_hpl_data(spreadsheet_id, test_name, action):
+    """
+    Graphs AutoHPL data based on the provided test results.
 
+    This function calls the `graph_linpack_data` function to generate graphs
+    for the AutoHPL test data based on the provided spreadsheet, test name,
+    and action to be performed.
+
+    Args:
+        spreadsheet_id (str): The ID of the spreadsheet containing the data.
+        test_name (str): The name of the test for which data is to be graphed.
+        action (str): The action to perform (e.g., "generate", "update", etc.) when graphing.
+
+    Returns:
+        Any: Returns the result of the `graph_linpack_data` function.
+    """
+    try:
+        # Call the function to graph the Linpack data for AutoHPL
+        return graph_linpack_data(spreadsheet_id, test_name, action)
+    except Exception as e:
+        # Handle potential errors and raise with a descriptive message
+        raise RuntimeError(f"Error graphing AutoHPL data: {str(e)}")

From abbfa5cf1324326df93835e1d407825e8efc9225 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:24:23 +0530
Subject: [PATCH 05/10] coremark reformatting

---
 quisby/benchmarks/coremark/compare.py  | 183 +++++++------
 quisby/benchmarks/coremark/coremark.py | 342 ++++++++++++++++---------
 2 files changed, 324 insertions(+), 201 deletions(-)

diff --git a/quisby/benchmarks/coremark/compare.py b/quisby/benchmarks/coremark/compare.py
index cf8732f..b98e1e6 100644
--- a/quisby/benchmarks/coremark/compare.py
+++ b/quisby/benchmarks/coremark/compare.py
@@ -1,12 +1,13 @@
 from quisby import custom_logger
 from itertools import groupby
-
 from quisby.sheet.sheet_util import (
     create_spreadsheet,
     append_to_sheet,
     read_sheet,
     get_sheet,
-    create_sheet, clear_sheet_data, clear_sheet_charts,
+    create_sheet,
+    clear_sheet_data,
+    clear_sheet_charts,
 )
 from quisby.util import combine_two_array_alternating, merge_lists_alternately, read_config
 from quisby.benchmarks.coremark.graph import graph_coremark_data
@@ -14,92 +15,126 @@
 
 
 def extract_prefix_and_number(input_string):
-    match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
-    if match:
-        prefix = match.group(1)
-        suffix = match.group(3)  # Extracts the suffix after the number
-        return prefix, suffix
+    """
+    Extracts the prefix and suffix from an instance name that contains a number.
+
+    :param input_string: Instance name, e.g., 't2.micro-01'
+    :return: Tuple (prefix, suffix) or (None, None) if no match
+    """
+    try:
+        match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
+        if match:
+            prefix = match.group(1)
+            suffix = match.group(3)  # Extracts the suffix after the number
+            return prefix, suffix
+    except Exception as exc:
+        custom_logger.error(f"Error extracting prefix and number from '{input_string}': {str(exc)}")
     return None, None
 
 
 def compare_inst(item1, item2):
-    cloud_type = read_config("cloud", "cloud_type")
-    if cloud_type == "local":
-        return True
-    elif cloud_type == "aws":
-        return item1.split(".")[0] == item2.split(".")[0]
-    elif cloud_type == "gcp":
+    """
+    Compares two instance names based on the cloud provider's convention.
 
-        return item1.split("-")[0] == item2.split("-")[0]
-    elif cloud_type == "azure":
-        return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+    :param item1: Instance name from the first spreadsheet
+    :param item2: Instance name from the second spreadsheet
+    :return: True if instance names are considered the same, False otherwise
+    """
+    try:
+        cloud_type = read_config("cloud", "cloud_type")
+        if cloud_type == "local":
+            return True
+        elif cloud_type == "aws":
+            return item1.split(".")[0] == item2.split(".")[0]
+        elif cloud_type == "gcp":
+            return item1.split("-")[0] == item2.split("-")[0]
+        elif cloud_type == "azure":
+            return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+    except Exception as exc:
+        custom_logger.error(f"Error comparing instances '{item1}' and '{item2}': {str(exc)}")
+    return False
 
 
 def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]):
-    values = []
-    results = []
-    spreadsheet_name = []
-
-    for spreadsheet in spreadsheets:
-        values.append(read_sheet(spreadsheet, range=test_name))
-        spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
-
-    for index, value in enumerate(values):
-        values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k)
-    list_1 = list(values[0])
-    list_2 = list(values[1])
-
-    for value in list_1:
-        for ele in list_2:
-            # Check max throughput
-            if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]:
-                if compare_inst(value[1][0], ele[1][0]):
-                    results.append([""])
-                    for item1 in value:
-                        for item2 in ele:
-                            if item1[0] == item2[0]:
-                                results = merge_lists_alternately(results, item1, item2)
-                    break
-
-            elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
-                if compare_inst(value[1][0], ele[1][0]):
-                    results.append([""])
-                    for item1 in value:
-                        for item2 in ele:
-                            if item1[0] == item2[0]:
-                                results.append(item1)
-                    break
-
-            elif value[1][0] == ele[1][0]:
-                if value[0][0] == ele[0][0]:
-                    results.append([""])
-                    results.append(value[0])
-                    for item1, item2 in zip(value[1:], ele[1:]):
-                        results = merge_lists_alternately(results, item1, item2)
-                    break
-
+    """
+    Compares CoreMark results from multiple spreadsheets and appends the merged data to the target sheet.
+
+    :param spreadsheets: List of spreadsheet names to compare
+    :param spreadsheetId: Target spreadsheet ID for appending data
+    :param test_name: The name of the test to compare (e.g., 'coremark')
+    :param table_name: List of columns to compare (default ["System name", "Price-perf"])
+    """
     try:
-        create_sheet(spreadsheetId, test_name)
-        custom_logger.info("Deleting existing charts and data from the sheet...")
-        clear_sheet_charts(spreadsheetId, test_name)
-        clear_sheet_data(spreadsheetId, test_name)
-        custom_logger.info("Appending new " + test_name + " data to sheet...")
-        append_to_sheet(spreadsheetId, results, test_name)
-        #graph_coremark_data(spreadsheetId, test_name, "compare")
+        values = []
+        results = []
+        spreadsheet_name = []
+
+        # Read data from each spreadsheet
+        for spreadsheet in spreadsheets:
+            values.append(read_sheet(spreadsheet, range=test_name))
+            spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
+
+        # Group the values into non-empty chunks
+        for index, value in enumerate(values):
+            values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k)
+
+        list_1 = list(values[0])
+        list_2 = list(values[1])
+
+        # Compare the CoreMark results from both spreadsheets
+        for value in list_1:
+            for ele in list_2:
+                # Check max throughput or other table data
+                if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]:
+                    if compare_inst(value[1][0], ele[1][0]):
+                        results.append([""])
+                        for item1 in value:
+                            for item2 in ele:
+                                if item1[0] == item2[0]:
+                                    results = merge_lists_alternately(results, item1, item2)
+                        break
+                # Handle cost/hour comparison
+                elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
+                    if compare_inst(value[1][0], ele[1][0]):
+                        results.append([""])
+                        for item1 in value:
+                            for item2 in ele:
+                                if item1[0] == item2[0]:
+                                    results.append(item1)
+                        break
+                # General comparison based on row keys
+                elif value[1][0] == ele[1][0]:
+                    if value[0][0] == ele[0][0]:
+                        results.append([""])
+                        results.append(value[0])
+                        for item1, item2 in zip(value[1:], ele[1:]):
+                            results = merge_lists_alternately(results, item1, item2)
+                        break
+
+        # Try to append the merged data to the target sheet
+        try:
+            create_sheet(spreadsheetId, test_name)
+            custom_logger.info(f"Deleting existing charts and data from the sheet '{test_name}'...")
+            clear_sheet_charts(spreadsheetId, test_name)
+            clear_sheet_data(spreadsheetId, test_name)
+            custom_logger.info(f"Appending new {test_name} data to sheet...")
+            append_to_sheet(spreadsheetId, results, test_name)
+            # Optionally, generate a graph for CoreMark comparison
+            # graph_coremark_data(spreadsheetId, test_name, "compare")
+        except Exception as exc:
+            custom_logger.error(f"Failed to append data to sheet '{test_name}' in spreadsheet {spreadsheetId}: {str(exc)}")
+            return spreadsheetId
     except Exception as exc:
-        custom_logger.debug(str(exc))
-        custom_logger.error("Failed to append data to sheet")
-        return spreadsheetId
-
-
+        custom_logger.error(f"Error comparing CoreMark results: {str(exc)}")
 
 
 if __name__ == "__main__":
+    # Example usage with empty spreadsheet list and target spreadsheetId
     spreadsheets = [
-        "",
-        "",
+        "",  # Add first spreadsheet ID
+        "",  # Add second spreadsheet ID
     ]
     test_name = "coremark"
 
-    compare_coremark_results(spreadsheets, "", test_name,
-                            table_name=["System Name"])
\ No newline at end of file
+    # Call the function to compare the results and update the sheet
+    compare_coremark_results(spreadsheets, "", test_name, table_name=["System Name"])
diff --git a/quisby/benchmarks/coremark/coremark.py b/quisby/benchmarks/coremark/coremark.py
index 8fbeb4e..45329ad 100644
--- a/quisby/benchmarks/coremark/coremark.py
+++ b/quisby/benchmarks/coremark/coremark.py
@@ -1,170 +1,258 @@
-""" Custom key to sort the data base don instance name """
 from itertools import groupby
-
-from quisby import custom_logger
 import re
-
-from quisby.util import read_config
+from quisby import custom_logger
+from quisby.util import read_config, process_instance, mk_int
 from quisby.pricing.cloud_pricing import get_cloud_pricing
 
-from quisby.util import process_instance
 
-from quisby.util import mk_int
+# Utility function to extract prefix, number, and suffix from instance names
+def extract_prefix_and_number(input_string):
+    """
+    Extracts the prefix, number, and suffix from a structured instance name string.
 
+    Example: 't2.micro-01' -> ('t2.micro', 1, '')
 
-def extract_prefix_and_number(input_string):
-    match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
-    if match:
-        prefix = match.group(1)
-        number = int(match.group(2))
-        suffix = match.group(3)
-        return prefix, number, suffix
+    :param input_string: The instance name as a string (e.g., 't2.micro-01')
+    :return: Tuple (prefix, number, suffix) or (None, None, None) if no match
+    """
+    try:
+        match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
+        if match:
+            prefix = match.group(1)
+            number = int(match.group(2))
+            suffix = match.group(3)
+            return prefix, number, suffix
+    except Exception as exc:
+        custom_logger.error(f"Error extracting prefix and number from input string '{input_string}': {str(exc)}")
     return None, None, None
 
 
+# Custom key for sorting instances based on the cloud type and instance name
 def custom_key(item):
-    cloud_type = read_config("cloud", "cloud_type")
-    if item[1][0] == "local":
-        return item[1][0]
-    elif cloud_type == "aws":
-        instance_name = item[1][0]
-        instance_type = instance_name.split(".")[0]
-        instance_number = instance_name.split(".")[1]
-        return instance_type, instance_number
-    elif cloud_type == "gcp":
-        instance_type = item[1][0].split("-")[0]
-        instance_number = int(item[1][0].split('-')[-1])
-        return instance_type, instance_number
-    elif cloud_type == "azure":
-        instance_type, instance_number, version = extract_prefix_and_number(item[1][0])
-        return instance_type, version, instance_number
+    """
+    Generates a custom sorting key based on the instance's cloud platform (AWS, GCP, Azure, or Local).
+
+    :param item: The item containing instance name
+    :return: Tuple that can be used as a sorting key
+    """
+    try:
+        cloud_type = read_config("cloud", "cloud_type")
+
+        if item[1][0] == "local":
+            return item[1][0]  # If local, use the first item directly as the key
+
+        # For cloud instances, split names to extract type and number
+        if cloud_type == "aws":
+            instance_name = item[1][0]
+            instance_type, instance_number = instance_name.split(".")
+            return instance_type, instance_number
+
+        elif cloud_type == "gcp":
+            instance_type = item[1][0].split("-")[0]
+            instance_number = int(item[1][0].split('-')[-1])
+            return instance_type, instance_number
+
+        elif cloud_type == "azure":
+            instance_type, version, instance_number = extract_prefix_and_number(item[1][0])
+            return instance_type, version, instance_number
+
+    except Exception as exc:
+        custom_logger.error(f"Error generating custom key for instance '{item[1][0]}': {str(exc)}")
+    return None
 
 
+# Calculates price-performance ratio for an instance
 def calc_price_performance(inst, avg):
-    region = read_config("cloud", "region")
-    cloud_type = read_config("cloud", "cloud_type")
-    os_type = read_config("test", "os_type")
-    cost_per_hour = None
-    price_perf = 0.0
+    """
+    Calculates the price-performance ratio for a given instance.
+
+    :param inst: Instance identifier (e.g., 't2.micro')
+    :param avg: Average performance for the instance
+    :return: Tuple (cost per hour, price-performance ratio)
+    """
     try:
-        cost_per_hour = get_cloud_pricing(
-            inst, region, cloud_type.lower(), os_type)
+        region = read_config("cloud", "region")
+        cloud_type = read_config("cloud", "cloud_type")
+        os_type = read_config("test", "os_type")
+
+        cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type)
         price_perf = float(avg) / float(cost_per_hour)
+
+        return cost_per_hour, price_perf
+
     except Exception as exc:
-        custom_logger.debug(str(exc))
-        custom_logger.error("Error calculating value !")
-    return cost_per_hour, price_perf
+        custom_logger.error(f"Error calculating price-performance for instance '{inst}': {str(exc)}")
+        return None, 0.0
 
 
+# Groups benchmarking results based on cloud platform
 def group_data(results):
-    cloud_type = read_config("cloud", "cloud_type")
-    if cloud_type == "aws":
-        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type"))
-    elif cloud_type == "azure":
-        results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
-        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
-    elif cloud_type == "gcp":
-        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature"))
-    elif cloud_type == "local":
-        return groupby(results, key=lambda x: process_instance(x[1][0], "family"))
+    """
+    Groups benchmarking results based on instance type and cloud platform.
+
+    :param results: List of benchmarking results
+    :return: Grouped results
+    """
+    try:
+        cloud_type = read_config("cloud", "cloud_type")
+
+        if cloud_type == "aws":
+            return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type"))
 
+        elif cloud_type == "azure":
+            results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
+            return groupby(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
 
+        elif cloud_type == "gcp":
+            return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature"))
+
+        elif cloud_type == "local":
+            return groupby(results, key=lambda x: process_instance(x[1][0], "family"))
+    except Exception as exc:
+        custom_logger.error(f"Error grouping benchmarking results: {str(exc)}")
+    return []
+
+
+# Sorts the results based on cloud platform naming conventions
 def sort_data(results):
-    cloud_type = read_config("cloud", "cloud_type")
-    if cloud_type == "aws":
-        results.sort(key=lambda x: str(process_instance(x[1][0], "family")))
-    elif cloud_type == "azure":
-        results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "feature")))
-    elif cloud_type == "gcp":
-        results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "sub_family")))
+    """
+    Sorts the benchmarking results based on instance naming conventions.
 
+    :param results: List of benchmarking results
+    """
+    try:
+        cloud_type = read_config("cloud", "cloud_type")
 
+        if cloud_type == "aws":
+            results.sort(key=lambda x: str(process_instance(x[1][0], "family")))
+
+        elif cloud_type == "azure":
+            results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "feature")))
+
+        elif cloud_type == "gcp":
+            results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "sub_family")))
+
+    except Exception as exc:
+        custom_logger.error(f"Error sorting benchmarking results: {str(exc)}")
+
+
+# Generates a summary report for CoreMark benchmarking data
 def create_summary_coremark_data(results, OS_RELEASE, sorted_results=None):
+    """
+    Generates a summary report for CoreMark data including average performance, cost per hour, and price-performance.
+
+    :param results: Benchmarking results
+    :param OS_RELEASE: OS release string (e.g., 'Ubuntu-20.04')
+    :param sorted_results: Pre-sorted benchmarking data (optional)
+    :return: Final report in structured format
+    """
     final_results = []
 
-    # Sort data based on instance name
-    results = list(filter(None, results))
-    sort_data(results)
-
-    for _, items in group_data(results):
-        cal_data = [["System name", "Test_passes-" + OS_RELEASE]]
-        items = list(items)
-        sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size")))
-        # sorted_results.extend(sorted_data)
-        cost_per_hour, price_per_perf = [], []
-
-        # Add summary data
-        for item in sorted_data:
-            sum = 0
-            avg = 0
-            iterations = 0
-            for index in range(3, len(item)):
-                sum = sum + float(item[index][1])
-                iterations = iterations + 1
-            avg = float(sum / iterations)
-            try:
-                cph, pp = calc_price_performance(item[1][0], avg)
-            except Exception as exc:
-                custom_logger.error(str(exc))
-                break
-            cal_data.append([item[1][0], avg])
-            price_per_perf.append([item[1][0], pp])
-            cost_per_hour.append([item[1][0], cph])
-        sorted_results = [[""]]
-        sorted_results += cal_data
-        sorted_results.append([""])
-        sorted_results.append(["Cost/Hr"])
-        sorted_results += cost_per_hour
-        sorted_results.append([""])
-        sorted_results.append(["Price-perf", f"Passes/$-{OS_RELEASE}"])
-        sorted_results += price_per_perf
-        final_results.extend(sorted_results)
+    try:
+        # Sort and filter results
+        results = list(filter(None, results))
+        sort_data(results)
+
+        for _, items in group_data(results):
+            cal_data = [["System name", "Test_passes-" + OS_RELEASE]]
+            items = list(items)
+            sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size")))
+
+            cost_per_hour, price_per_perf = [], []
+
+            # Add summary data for each instance
+            for item in sorted_data:
+                sum = 0
+                avg = 0
+                iterations = 0
+
+                # Calculate average performance
+                for index in range(3, len(item)):
+                    sum += float(item[index][1])
+                    iterations += 1
+                avg = float(sum / iterations)
+
+                # Calculate cost per hour and price-perf
+                try:
+                    cph, pp = calc_price_performance(item[1][0], avg)
+                except Exception as exc:
+                    custom_logger.error(f"Error calculating price-performance for instance '{item[1][0]}': {str(exc)}")
+                    break
+
+                # Add data to final report
+                cal_data.append([item[1][0], avg])
+                price_per_perf.append([item[1][0], pp])
+                cost_per_hour.append([item[1][0], cph])
+
+            # Compile the summary report
+            sorted_results = [[""]]
+            sorted_results += cal_data
+            sorted_results.append([""])
+            sorted_results.append(["Cost/Hr"])
+            sorted_results += cost_per_hour
+            sorted_results.append([""])
+            sorted_results.append(["Price-perf", f"Passes/$-{OS_RELEASE}"])
+            sorted_results += price_per_perf
+
+            final_results.extend(sorted_results)
+
+    except Exception as exc:
+        custom_logger.error(f"Error creating CoreMark summary data: {str(exc)}")
+
     return final_results
 
 
+# Extracts and processes CoreMark data from a file
 def extract_coremark_data(path, system_name, OS_RELEASE):
-    """"""
+    """
+    Extracts and processes CoreMark results from a file (CSV format).
+
+    :param path: Path to the file containing benchmarking results
+    :param system_name: The name of the system being benchmarked
+    :param OS_RELEASE: OS release version (e.g., 'Ubuntu-20.04')
+    :return: Processed benchmarking results or None if there was an error
+    """
     results = []
     processed_data = []
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
 
-    # Extract data from file
     try:
+        # Open the CSV file
         if path.endswith(".csv"):
             with open(path) as file:
                 coremark_results = file.readlines()
-            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
         else:
-            return None
+            custom_logger.error(f"Invalid file format for path: {path}")
+            return None  # Not a CSV file
     except Exception as exc:
-        custom_logger.debug(str(exc))
-        custom_logger.error("Unable to extract data from csv file for coremark")
+        custom_logger.error(f"Error reading CSV file '{path}': {str(exc)}")
+        return None  # Error reading file
+
+    # Process the CoreMark data
+    try:
+        data_index = 0
+        header = []
+        for index, data in enumerate(coremark_results):
+            if "iteration" in data:
+                data_index = index
+                header = data.strip("\n").split(":")
+            else:
+                coremark_results[index] = data.strip("\n").split(":")
+        coremark_results = [header] + coremark_results[data_index + 1:]
+
+        # Format the data for report generation
+        iteration = 1
+        for row in coremark_results:
+            if "test passes" in row:
+                processed_data.append([""])
+                processed_data.append([system_name])
+                processed_data.append([row[0], row[2]])  # System name and test passes
+            else:
+                processed_data.append([iteration, row[2]])  # Iteration and performance
+                iteration += 1
+
+        results.append(processed_data)
+    except Exception as exc:
+        custom_logger.error(f"Error processing CoreMark data from file '{path}': {str(exc)}")
         return None
-    data_index = 0
-    header = []
-    for index, data in enumerate(coremark_results):
-        if "iteration" in data:
-            data_index = index
-            header = data.strip("\n").split(":")
-        else:
-            coremark_results[index] = data.strip("\n").split(":")
-    coremark_results = [header] + coremark_results[data_index + 1:]
-
-    # for index, data in enumerate(coremark_results):
-    #     coremark_results[index] = data.strip("\n").split(":")
-
-    # Format the data
-    iteration = 1
-    for row in coremark_results:
-        if "test passes" in row:
-            processed_data.append([""])
-            processed_data.append([system_name])
-            processed_data.append([row[0], row[2]])
-        else:
-            processed_data.append([iteration, row[2]])
-            iteration = iteration + 1
-    results.append(processed_data)
 
-    return results, summary_data
+    return results

From d5456ee7aad1b06fbc140d364eec71106c972a96 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:26:59 +0530
Subject: [PATCH 06/10] coremark_pro reformatting

---
 quisby/benchmarks/coremark_pro/compare.py     | 186 +++++++++++-------
 .../benchmarks/coremark_pro/coremark_pro.py   |  94 ++++++---
 2 files changed, 184 insertions(+), 96 deletions(-)

diff --git a/quisby/benchmarks/coremark_pro/compare.py b/quisby/benchmarks/coremark_pro/compare.py
index 15178cd..fd0122f 100644
--- a/quisby/benchmarks/coremark_pro/compare.py
+++ b/quisby/benchmarks/coremark_pro/compare.py
@@ -1,101 +1,141 @@
 from itertools import groupby
-
 from quisby import custom_logger
 from quisby.benchmarks.coremark_pro.graph import graph_coremark_pro_data
 from quisby.sheet.sheet_util import (
     append_to_sheet,
     read_sheet,
     get_sheet,
-    create_sheet, clear_sheet_data, clear_sheet_charts,
+    create_sheet,
+    clear_sheet_data,
+    clear_sheet_charts,
 )
-from quisby.util import merge_lists_alternately,read_config
+from quisby.util import merge_lists_alternately, read_config
 import re
 
-
 def extract_prefix_and_number(input_string):
-    match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
-    if match:
-        prefix = match.group(1)
-        suffix = match.group(3)  # Extracts the suffix after the number
-        return prefix, suffix
+    """
+    Extracts the prefix and suffix from an instance name that contains a number.
+
+    :param input_string: Instance name, e.g., 't2.micro-01'
+    :return: Tuple (prefix, suffix) or (None, None) if no match
+    """
+    try:
+        match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
+        if match:
+            prefix = match.group(1)
+            suffix = match.group(3)  # Extracts the suffix after the number
+            return prefix, suffix
+    except Exception as exc:
+        custom_logger.error(f"Error extracting prefix and number from '{input_string}': {str(exc)}")
     return None, None
 
 
 def compare_inst(item1, item2):
-    cloud_type = read_config("cloud", "cloud_type")
-    if cloud_type == "local":
-        return True
-    elif cloud_type == "aws":
-        return item1.split(".")[0] == item2.split(".")[0]
-    elif cloud_type == "gcp":
-        return item1.split("-")[0] == item2.split("-")[0]
-    elif cloud_type == "azure":
-        return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
-
-
-def compare_coremark_pro_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]):
+    """
+    Compares two instance names based on the cloud provider's naming convention.
+
+    :param item1: Instance name from the first spreadsheet
+    :param item2: Instance name from the second spreadsheet
+    :return: True if instance names match, False otherwise
+    """
+    try:
+        cloud_type = read_config("cloud", "cloud_type")
+        if cloud_type == "local":
+            return True
+        elif cloud_type == "aws":
+            return item1.split(".")[0] == item2.split(".")[0]
+        elif cloud_type == "gcp":
+            return item1.split("-")[0] == item2.split("-")[0]
+        elif cloud_type == "azure":
+            return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+    except Exception as exc:
+        custom_logger.error(f"Error comparing instances '{item1}' and '{item2}': {str(exc)}")
+    return False
+
+
+def compare_coremark_pro_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]):
+    """
+    Compares CoreMark Pro results from multiple spreadsheets and appends the merged data to the target sheet.
+
+    :param spreadsheets: List of spreadsheet names to compare
+    :param spreadsheetId: Target spreadsheet ID for appending data
+    :param test_name: The name of the test to compare (e.g., 'coremark_pro')
+    :param table_name: List of columns to compare (default ["System name", "Price-perf"])
+    """
     values = []
     results = []
     spreadsheet_name = []
 
-    for spreadsheet in spreadsheets:
-        values.append(read_sheet(spreadsheet, range=test_name))
-        spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
-
-    for index, value in enumerate(values):
-        values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k)
-    list_1 = list(values[0])
-    list_2 = list(values[1])
-
-    for value in list_1:
-        for ele in list_2:
-            # Check max throughput
-            if value[1][0] in table_name and ele[1][0] in table_name and value[1][0] == ele[1][0]:
-                if compare_inst(value[2][0], ele[2][0]) and value[0][0] == ele[0][0]:
-                    results.append([""])
-                    for item1 in value:
-                        for item2 in ele:
-                            if item1[0] == item2[0]:
-                                results = merge_lists_alternately(results, item1, item2)
-                    break
-
-            elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
-                if compare_inst(value[1][0], ele[1][0]):
-                    results.append([""])
-                    for item1 in value:
-                        for item2 in ele:
-                            if item1[0] == item2[0]:
-                                results.append(item1)
-                    break
-
-            elif value[1][0] == ele[1][0]:
-                if value[0][0] == ele[0][0]:
-                    results.append([""])
-                    results.append(value[0])
-                    for item1, item2 in zip(value[1:], ele[1:]):
-                        results = merge_lists_alternately(results, item1, item2)
-                    break
-
     try:
-        create_sheet(spreadsheetId, test_name)
-        custom_logger.info("Deleting existing charts and data from the sheet...")
-        clear_sheet_charts(spreadsheetId, test_name)
-        clear_sheet_data(spreadsheetId, test_name)
-        custom_logger.info("Appending new " + test_name + " data to sheet...")
-        append_to_sheet(spreadsheetId, results, test_name)
-        #graph_coremark_pro_data(spreadsheetId, test_name, "compare")
+        # Read data from each spreadsheet
+        for spreadsheet in spreadsheets:
+            values.append(read_sheet(spreadsheet, range=test_name))
+            spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
+
+        # Group the values into non-empty chunks
+        for index, value in enumerate(values):
+            values[index] = [list(g) for k, g in groupby(value, key=lambda x: x != []) if k]
+
+        list_1 = list(values[0])
+        list_2 = list(values[1])
+
+        # Compare the CoreMark Pro results from both spreadsheets
+        for value in list_1:
+            for ele in list_2:
+                # Check for max throughput or other table data
+                if value[1][0] in table_name and ele[1][0] in table_name and value[1][0] == ele[1][0]:
+                    if compare_inst(value[2][0], ele[2][0]) and value[0][0] == ele[0][0]:
+                        results.append([""])
+                        for item1 in value:
+                            for item2 in ele:
+                                if item1[0] == item2[0]:
+                                    results = merge_lists_alternately(results, item1, item2)
+                        break
+
+                # Handle cost/hour comparison
+                elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
+                    if compare_inst(value[1][0], ele[1][0]):
+                        results.append([""])
+                        for item1 in value:
+                            for item2 in ele:
+                                if item1[0] == item2[0]:
+                                    results.append(item1)
+                        break
+
+                # General comparison based on row keys
+                elif value[1][0] == ele[1][0]:
+                    if value[0][0] == ele[0][0]:
+                        results.append([""])
+                        results.append(value[0])
+                        for item1, item2 in zip(value[1:], ele[1:]):
+                            results = merge_lists_alternately(results, item1, item2)
+                        break
+
+        # Attempt to create and update the sheet with the results
+        try:
+            create_sheet(spreadsheetId, test_name)
+            custom_logger.info("Deleting existing charts and data from the sheet...")
+            clear_sheet_charts(spreadsheetId, test_name)
+            clear_sheet_data(spreadsheetId, test_name)
+            custom_logger.info(f"Appending new {test_name} data to sheet...")
+            append_to_sheet(spreadsheetId, results, test_name)
+            # Optionally, generate a graph for CoreMark Pro comparison
+            # graph_coremark_pro_data(spreadsheetId, test_name, "compare")
+        except Exception as exc:
+            custom_logger.error(f"Failed to append data to sheet '{test_name}' in spreadsheet {spreadsheetId}: {str(exc)}")
+            return spreadsheetId
+
     except Exception as exc:
-        custom_logger.debug(str(exc))
-        custom_logger.error("Failed to append data to sheet")
-        return spreadsheetId
+        custom_logger.error(f"Error comparing CoreMark Pro results: {str(exc)}")
 
 
 if __name__ == "__main__":
+    # Example usage with empty spreadsheet list and target spreadsheetId
     spreadsheets = [
-        "",
-        "",
+        "",  # Add first spreadsheet ID
+        "",  # Add second spreadsheet ID
     ]
     test_name = "coremark_pro"
 
-    compare_coremark_pro_results(spreadsheets, "", test_name,
-                                 table_name=["System Name"])
\ No newline at end of file
+    # Call the function to compare the results and update the sheet
+    compare_coremark_pro_results(spreadsheets, "", test_name, table_name=["System Name"])
diff --git a/quisby/benchmarks/coremark_pro/coremark_pro.py b/quisby/benchmarks/coremark_pro/coremark_pro.py
index 13e3058..c70a72e 100644
--- a/quisby/benchmarks/coremark_pro/coremark_pro.py
+++ b/quisby/benchmarks/coremark_pro/coremark_pro.py
@@ -9,6 +9,12 @@
 
 
 def extract_prefix_and_number(input_string):
+    """
+    Extract the prefix, number, and suffix from an instance name.
+
+    :param input_string: Instance name like 't2.micro-01'
+    :return: Tuple (prefix, number, suffix) or (None, None, None) if no match.
+    """
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
     if match:
         prefix = match.group(1)
@@ -19,43 +25,59 @@ def extract_prefix_and_number(input_string):
 
 
 def custom_key(item):
+    """
+    Generate a custom key for sorting or grouping instances based on cloud provider and instance name format.
+
+    :param item: A tuple containing instance data.
+    :return: A tuple key for grouping.
+    """
     cloud_type = read_config("cloud", "cloud_type")
     try:
         if item[1][0] == "local":
             return item[1][0]
         elif cloud_type == "aws":
-            instance_type = item[1][0].split(".")[0]
-            instance_number = item[1][0].split(".")[1]
+            instance_type, instance_number = item[1][0].split(".")
             return instance_type, instance_number
         elif cloud_type == "gcp":
-            instance_type = item[1][0].split("-")[0]
-            instance_number = int(item[1][0].split('-')[-1])
-            return instance_type, instance_number
+            instance_type, instance_number = item[1][0].split("-")
+            return instance_type, int(instance_number)
         elif cloud_type == "azure":
-            instance_type, instance_number, version = extract_prefix_and_number(item[1][0])
+            instance_type, version, instance_number = extract_prefix_and_number(item[1][0])
             return instance_type, version, instance_number
     except Exception as exc:
-        custom_logger.error(str(exc))
+        custom_logger.error(f"Error in custom_key for {item[1][0]}: {str(exc)}")
         return "", ""
 
 
 def calc_price_performance(inst, avg):
+    """
+    Calculate price-perf ratio for an instance based on its cost per hour and performance.
+
+    :param inst: Instance type or ID.
+    :param avg: Average score for the instance.
+    :return: Tuple (cost_per_hour, price_perf)
+    """
     region = read_config("cloud", "region")
     cloud_type = read_config("cloud", "cloud_type")
     os_type = read_config("test", "os_type")
     cost_per_hour = None
     price_perf = 0.0
     try:
-        cost_per_hour = get_cloud_pricing(
-            inst, region, cloud_type.lower(), os_type)
-        price_perf = float(avg) / float(cost_per_hour)
+        cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type)
+        price_perf = float(avg) / float(cost_per_hour) if cost_per_hour else 0
     except Exception as exc:
         custom_logger.debug(str(exc))
-        custom_logger.error("Error calculating value !")
+        custom_logger.error("Error calculating price-performance!")
     return cost_per_hour, price_perf
 
 
 def group_data(results):
+    """
+    Group data based on cloud type and instance attributes.
+
+    :param results: List of results to group.
+    :return: Grouped results.
+    """
     cloud_type = read_config("cloud", "cloud_type")
     if cloud_type == "aws":
         return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type"))
@@ -69,6 +91,11 @@ def group_data(results):
 
 
 def sort_data(results):
+    """
+    Sort data based on cloud type and instance attributes.
+
+    :param results: List of results to sort.
+    """
     cloud_type = read_config("cloud", "cloud_type")
     if cloud_type == "aws":
         results.sort(key=lambda x: str(process_instance(x[1][0], "family")))
@@ -79,34 +106,48 @@ def sort_data(results):
 
 
 def create_summary_coremark_pro_data(results, OS_RELEASE):
+    """
+    Create a summary of the CoreMark Pro data, including price-performance and iteration details.
+
+    :param results: List of benchmark results.
+    :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04").
+    :return: List of summarized results.
+    """
     ret_results = []
 
-    # Sort data based on instance name
+    # Sort and group data
     results = list(filter(None, results))
     sort_data(results)
     results = group_data(results)
+
     for _, items in results:
         multi_iter = [["Multi Iterations"], ["System name", "Score-" + OS_RELEASE]]
         single_iter = [["Single Iterations"], ["System name", "Score-" + OS_RELEASE]]
         cal_data = [["System name", "Test_passes-" + OS_RELEASE]]
         items = list(items)
+
+        # Sort data by instance size
         sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size")))
-        # Add summary data
+
+        # Collect cost per hour and price performance data
         cost_per_hour, price_perf_single, price_perf_multi = [], [], []
         for item in sorted_data:
             for index in range(3, len(item)):
                 multi_iter.append([item[1][0], item[index][1]])
                 single_iter.append([item[1][0], item[index][2]])
+
                 try:
                     cph, ppm = calc_price_performance(item[1][0], item[index][1])
                     cph, pps = calc_price_performance(item[1][0], item[index][2])
                 except Exception as exc:
                     custom_logger.error(str(exc))
                     break
+
                 price_perf_multi.append([item[1][0], ppm])
                 price_perf_single.append([item[1][0], pps])
                 cost_per_hour.append([item[1][0], cph])
-                # final_results += item
+
+        # Prepare the final result for this item
         final_results = [[""]]
         final_results += single_iter
         final_results.append([""])
@@ -121,41 +162,48 @@ def create_summary_coremark_pro_data(results, OS_RELEASE):
         final_results.append(["Price-perf", f"Score/$-{OS_RELEASE}"])
         final_results += price_perf_multi
         ret_results.extend(final_results)
+
     return ret_results
 
 
 def extract_coremark_pro_data(path, system_name, OS_RELEASE):
-    """"""
+    """
+    Extract CoreMark Pro data from a CSV file, process it, and return the formatted results.
+
+    :param path: Path to the CSV file containing the benchmark results.
+    :param system_name: Name of the system being tested.
+    :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04").
+    :return: Processed results.
+    """
     results = []
     processed_data = []
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
 
     # Extract data from file
     try:
         if path.endswith(".csv"):
             with open(path) as file:
                 coremark_pro_results = file.readlines()
-            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
         else:
             return None, None
     except Exception as exc:
         custom_logger.debug(str(exc))
-        custom_logger.error("Unable to extract data from csv file for coremark_pro")
+        custom_logger.error("Unable to extract data from csv file for CoreMark Pro")
         return None, None
+
     data_index = 0
     header = []
+
+    # Parse the CSV data
     for index, data in enumerate(coremark_pro_results):
         if "Test:Multi iterations:Single Iterations:Scaling" in data:
             data_index = index
             header = data.strip("\n").split(":")
         else:
             coremark_pro_results[index] = data.strip("\n").split(":")
+
     coremark_pro_results = [header] + coremark_pro_results[data_index + 1:]
 
-    # Format the data
-    iteration = 1
+    # Format the data into the structure we need
     for row in coremark_pro_results:
         if "Test" in row:
             processed_data.append([""])
@@ -165,4 +213,4 @@ def extract_coremark_pro_data(path, system_name, OS_RELEASE):
             processed_data.append(["Score", row[1], row[2]])
 
     results.append(processed_data)
-    return results, summary_data
+    return results

From b0660059d04325bc0570946d412645fa7144c3f6 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:30:26 +0530
Subject: [PATCH 07/10] passmark reformatting

---
 quisby/benchmarks/passmark/compare.py  |  51 ++++++++---
 quisby/benchmarks/passmark/passmark.py | 118 +++++++++++++++++--------
 2 files changed, 122 insertions(+), 47 deletions(-)

diff --git a/quisby/benchmarks/passmark/compare.py b/quisby/benchmarks/passmark/compare.py
index 494109a..fe9cf25 100644
--- a/quisby/benchmarks/passmark/compare.py
+++ b/quisby/benchmarks/passmark/compare.py
@@ -1,4 +1,5 @@
 from itertools import groupby
+import re
 
 from quisby import custom_logger
 from quisby.benchmarks.passmark.graph import graph_passmark_data
@@ -6,13 +7,19 @@
     append_to_sheet,
     read_sheet,
     get_sheet,
-    create_sheet, clear_sheet_data, clear_sheet_charts,
+    create_sheet,
+    clear_sheet_data,
+    clear_sheet_charts,
 )
-from quisby.util import merge_lists_alternately,read_config
-import re
+from quisby.util import merge_lists_alternately, read_config
 
 
+# Helper function to extract prefix and suffix from instance names
 def extract_prefix_and_number(input_string):
+    """
+    Extract the prefix and suffix from an instance name that includes a number.
+    Example: "t2.micro-01" => ("t2.micro", "01")
+    """
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
     if match:
         prefix = match.group(1)
@@ -21,8 +28,13 @@ def extract_prefix_and_number(input_string):
     return None, None
 
 
+# Helper function to compare instance names based on cloud type
 def compare_inst(item1, item2):
+    """
+    Compare two instance names based on the cloud type.
+    """
     cloud_type = read_config("cloud", "cloud_type")
+
     if cloud_type == "local":
         return True
     elif cloud_type == "aws":
@@ -33,23 +45,32 @@ def compare_inst(item1, item2):
         return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
 
 
-def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]):
+# Function to compare PassMark results between two spreadsheets
+def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]):
+    """
+    Compare PassMark benchmark data between two Google Sheets.
+    The data is merged and appended to the target sheet.
+    """
     values = []
     results = []
     spreadsheet_name = []
 
+    # Read data from each spreadsheet
     for spreadsheet in spreadsheets:
         values.append(read_sheet(spreadsheet, range=test_name))
         spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
 
+    # Group values into segments (non-empty groups)
     for index, value in enumerate(values):
         values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k)
+
     list_1 = list(values[0])
     list_2 = list(values[1])
 
+    # Merge the results by comparing each value and adding to the final results
     for value in list_1:
         for ele in list_2:
-            # Check max throughput
+            # Compare system name and price-perf
             if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]:
                 if compare_inst(value[1][0], ele[1][0]):
                     results.append([""])
@@ -58,6 +79,8 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=
                             if item1[0] == item2[0]:
                                 results = merge_lists_alternately(results, item1, item2)
                     break
+
+            # Compare cost per hour
             elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr":
                 if compare_inst(value[1][0], ele[1][0]):
                     results.append([""])
@@ -67,6 +90,7 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=
                                 results.append(item1)
                     break
 
+            # Compare other matching rows
             elif value[1][0] == ele[1][0]:
                 if value[0][0] == ele[0][0]:
                     results.append([""])
@@ -75,26 +99,31 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=
                         results = merge_lists_alternately(results, item1, item2)
                     break
 
+    # Create the sheet and append the merged results
     try:
         create_sheet(spreadsheetId, test_name)
         custom_logger.info("Deleting existing charts and data from the sheet...")
         clear_sheet_charts(spreadsheetId, test_name)
         clear_sheet_data(spreadsheetId, test_name)
-        custom_logger.info("Appending new " + test_name + " data to sheet...")
+
+        custom_logger.info(f"Appending new {test_name} data to sheet...")
         append_to_sheet(spreadsheetId, results, test_name)
-        #graph_passmark_data(spreadsheetId, test_name, "compare")
+        # Optionally, create a graph (commented out for now)
+        # graph_passmark_data(spreadsheetId, test_name, "compare")
+
     except Exception as exc:
         custom_logger.debug(str(exc))
         custom_logger.error("Failed to append data to sheet")
         return spreadsheetId
 
 
+# Main execution block
 if __name__ == "__main__":
     spreadsheets = [
-        "",
-        "",
+        "spreadsheet_id_1",  # Replace with actual spreadsheet ID
+        "spreadsheet_id_2",  # Replace with actual spreadsheet ID
     ]
     test_name = "passmark"
 
-    compare_passmark_results(spreadsheets, "", test_name,
-                             table_name=["SYSTEM_NAME"])
\ No newline at end of file
+    # Compare the PassMark results from two spreadsheets
+    compare_passmark_results(spreadsheets, "spreadsheet_id_1", test_name, table_name=["SYSTEM_NAME"])
diff --git a/quisby/benchmarks/passmark/passmark.py b/quisby/benchmarks/passmark/passmark.py
index 8b49935..b55b87c 100644
--- a/quisby/benchmarks/passmark/passmark.py
+++ b/quisby/benchmarks/passmark/passmark.py
@@ -1,16 +1,20 @@
+import re
 from itertools import groupby
-
 from scipy.stats import gmean
-
 from quisby import custom_logger
 from quisby.util import read_config
 from quisby.pricing.cloud_pricing import get_cloud_pricing
-import re
-
 from quisby.util import process_instance, mk_int
 
 
 def extract_prefix_and_number(input_string):
+    """
+    Extract the prefix, number, and suffix from an instance name.
+    Example: 't2.micro-01' -> ('t2', 1, '.micro')
+
+    :param input_string: Instance name string (e.g., 't2.micro-01').
+    :return: Tuple (prefix, number, suffix) or (None, None, None) if no match.
+    """
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
     if match:
         prefix = match.group(1)
@@ -21,38 +25,58 @@ def extract_prefix_and_number(input_string):
 
 
 def custom_key(item):
+    """
+    Generate a custom key for sorting/grouping based on the cloud provider type.
+
+    :param item: A tuple containing instance data.
+    :return: A tuple key for grouping/sorting.
+    """
     cloud_type = read_config("cloud", "cloud_type")
-    if item[0] == "local":
-        return item[0]
-    elif cloud_type == "aws":
-        instance_type = item[0].split(".")[0]
-        instance_number = item[0].split(".")[1]
-        return instance_type, instance_number
-    elif cloud_type == "gcp":
-        instance_type = item[0].split("-")[0]
-        instance_number = int(item[0].split('-')[-1])
-        return instance_type, instance_number
-    elif cloud_type == "azure":
-        instance_type, instance_number, version= extract_prefix_and_number(item[0])
-        return instance_type, version, instance_number
+    try:
+        if item[0] == "local":
+            return item[0]
+        elif cloud_type == "aws":
+            instance_type, instance_number = item[0].split(".")
+            return instance_type, instance_number
+        elif cloud_type == "gcp":
+            instance_type, instance_number = item[0].split("-")
+            return instance_type, int(instance_number)
+        elif cloud_type == "azure":
+            instance_type, version, instance_number = extract_prefix_and_number(item[0])
+            return instance_type, version, instance_number
+    except Exception as exc:
+        custom_logger.error(f"Error in custom_key for {item[0]}: {str(exc)}")
+        return "", ""
 
 
 def calc_price_performance(inst, avg):
+    """
+    Calculate the price-performance ratio for a given instance.
+
+    :param inst: Instance type or ID.
+    :param avg: Average performance score (e.g., geometric mean).
+    :return: Tuple (cost_per_hour, price_performance).
+    """
     region = read_config("cloud", "region")
     cloud_type = read_config("cloud", "cloud_type")
     os_type = read_config("test", "os_type")
     cost_per_hour = None
     try:
-        cost_per_hour = get_cloud_pricing(
-            inst, region, cloud_type.lower(), os_type)
-        price_perf = float(avg)/float(cost_per_hour)
+        cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type)
+        price_perf = float(avg) / float(cost_per_hour) if cost_per_hour else 0
     except Exception as exc:
         custom_logger.debug(str(exc))
-        custom_logger.error("Error calculating value !")
+        custom_logger.error("Error calculating price-performance!")
     return cost_per_hour, price_perf
 
 
 def group_data(results):
+    """
+    Group benchmark data based on cloud type and instance characteristics.
+
+    :param results: List of benchmark results.
+    :return: Grouped results.
+    """
     cloud_type = read_config("cloud", "cloud_type")
     if cloud_type == "aws":
         return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type"))
@@ -60,12 +84,17 @@ def group_data(results):
         results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature"))
         return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature"))
     elif cloud_type == "gcp":
-        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version","sub_family","feature"))
+        return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature"))
     elif cloud_type == "local":
         return groupby(results, key=lambda x: process_instance(x[1][0], "family"))
 
 
 def sort_data(results):
+    """
+    Sort benchmark data based on instance attributes and cloud type.
+
+    :param results: List of benchmark results.
+    """
     cloud_type = read_config("cloud", "cloud_type")
     if cloud_type == "aws":
         results.sort(key=lambda x: str(process_instance(x[1][0], "family")))
@@ -76,17 +105,26 @@ def sort_data(results):
 
 
 def create_summary_passmark_data(data, OS_RELEASE):
+    """
+    Create a summary of PassMark data, including geometric mean and price-performance metrics.
+
+    :param data: List of benchmark data.
+    :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04").
+    :return: List of summarized results.
+    """
     ret_results = []
     results = list(filter(None, data))
     sort_data(results)
     results = group_data(results)
+
     for _, items in results:
-        mac_data = [["System name", "Geomean-" + OS_RELEASE]]
+        mac_data = [["System name", f"Geomean-{OS_RELEASE}"]]
         cost_data = [["Cost/Hr"]]
         price_perf_data = [["Price-perf", f"Geomean/$-{OS_RELEASE}"]]
         items = list(items)
         sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size")))
-        cost_per_hour, price_per_perf = [], []
+
+        cost_per_hour, price_perf = [], []
         # Add summary data
         for index, row in enumerate(sorted_data):
             inst = row[1][0]
@@ -95,44 +133,50 @@ def create_summary_passmark_data(data, OS_RELEASE):
                 try:
                     gmean_data.append(float(row[i][1].strip()))
                 except Exception as exc:
-                    gmean_data.append(0.0)
+                    gmean_data.append(0.0)  # Default to 0.0 for non-numeric values
             gdata = gmean(gmean_data)
             try:
                 cph, pp = calc_price_performance(inst, gdata)
             except Exception as exc:
-                custom_logger.error(str(exc))
+                custom_logger.error(f"Error calculating price performance for {inst}: {str(exc)}")
                 continue
 
             mac_data.append([inst, gdata])
             cost_data.append([inst, cph])
             price_perf_data.append([inst, pp])
+
+        # Append all data for the current group
         ret_results.append([""])
         ret_results.extend(mac_data)
         ret_results.append([""])
         ret_results.extend(cost_data)
         ret_results.append([""])
         ret_results.extend(price_perf_data)
+
     return ret_results
 
 
 def extract_passmark_data(path, system_name, OS_RELEASE):
-    """"""
+    """
+    Extract and process PassMark benchmark data from a CSV file.
+
+    :param path: Path to the CSV file containing the benchmark results.
+    :param system_name: Name of the system being tested.
+    :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04").
+    :return: Processed results as a list.
+    """
     results = []
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
     # Extract data from file
     try:
         if path.endswith("results.csv"):
             with open(path) as file:
                 passmark_results = file.readlines()
-            summary_data.append([system_name,  server + "/results/" + result_dir + "/" + path])
-
         else:
             return None
     except Exception as exc:
-        custom_logger.error(str(exc))
-        return None, None
+        custom_logger.error(f"Error reading file {path}: {str(exc)}")
+        return None
 
     data_index = 0
     header = []
@@ -142,9 +186,11 @@ def extract_passmark_data(path, system_name, OS_RELEASE):
             data_index = index
         else:
             passmark_results[index] = data.strip("\n").split(":")
-    passmark_results = [header] + passmark_results[data_index +1 :]
+
+    passmark_results = [header] + passmark_results[data_index + 1:]
 
     results.append([""])
     results.append([system_name])
     results.extend(passmark_results)
-    return [results], summary_data
+
+    return [results]

From 3f49df0ed1a4feaaa5bea7bb188d70855604d5c4 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:36:17 +0530
Subject: [PATCH 08/10] pyperf reformatting

---
 quisby/benchmarks/pyperf/compare.py | 101 +++++++++++++++++++++-------
 quisby/benchmarks/pyperf/pyperf.py  |   7 +-
 2 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/quisby/benchmarks/pyperf/compare.py b/quisby/benchmarks/pyperf/compare.py
index f9ccbde..de937ed 100644
--- a/quisby/benchmarks/pyperf/compare.py
+++ b/quisby/benchmarks/pyperf/compare.py
@@ -1,3 +1,4 @@
+import re
 from itertools import groupby
 
 from quisby import custom_logger
@@ -5,50 +6,97 @@
     append_to_sheet,
     read_sheet,
     get_sheet,
-    create_sheet, clear_sheet_data, clear_sheet_charts,
+    create_sheet,
+    clear_sheet_data,
+    clear_sheet_charts,
 )
-from quisby.util import merge_lists_alternately,read_config
-import re
+from quisby.util import merge_lists_alternately, read_config
 
 
+# Helper function to extract prefix and suffix from instance names
 def extract_prefix_and_number(input_string):
+    """
+    Extract the prefix and suffix from an instance name that contains a number.
+    Example: "t2.micro-01" => ("t2.micro", "01")
+
+    Args:
+        input_string (str): Instance name, e.g., "t2.micro-01"
+
+    Returns:
+        tuple: (prefix, suffix) or (None, None) if no match
+    """
     match = re.search(r'^(.*?)(\d+)(.*?)$', input_string)
     if match:
         prefix = match.group(1)
-        suffix = match.group(3)  # Extracts the suffix after the number
+        suffix = match.group(3)
         return prefix, suffix
     return None, None
 
 
+# Compare two instance types based on cloud configuration
 def compare_inst(item1, item2):
+    """
+    Compares two instances based on their cloud type.
+
+    Args:
+        item1 (str): Instance type from the first sheet
+        item2 (str): Instance type from the second sheet
+
+    Returns:
+        bool: True if instances match based on cloud type, False otherwise
+    """
     cloud_type = read_config("cloud", "cloud_type")
-    if cloud_type == "local":
-        return True
-    elif cloud_type == "aws":
-        return item1.split(".")[0] == item2.split(".")[0]
-    elif cloud_type == "gcp":
-        return item1.split("-")[0] == item2.split("-")[0]
-    elif cloud_type == "azure":
-        return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+    try:
+        if cloud_type == "local":
+            return True
+        elif cloud_type == "aws":
+            return item1.split(".")[0] == item2.split(".")[0]
+        elif cloud_type == "gcp":
+            return item1.split("-")[0] == item2.split("-")[0]
+        elif cloud_type == "azure":
+            return extract_prefix_and_number(item1) == extract_prefix_and_number(item2)
+    except Exception as exc:
+        custom_logger.error(f"Error comparing instances {item1} and {item2}: {exc}")
+        return False
 
 
-def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]):
+# Compare the pyperf results from multiple spreadsheets
+def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]):
+    """
+    Compare and merge benchmark results from multiple spreadsheets and append the results to the given sheet.
+
+    Args:
+        spreadsheets (list): List of spreadsheet IDs to compare
+        spreadsheetId (str): Spreadsheet ID where the result should be saved
+        test_name (str): Name of the test (e.g., "pyperf")
+        table_name (list): List of table names to compare (default: ["System name", "Price-perf"])
+
+    Returns:
+        str: The spreadsheet ID if the operation was successful
+    """
     values = []
     results = []
-    spreadsheet_name = []
+    spreadsheet_names = []
 
-    for spreadsheet in spreadsheets:
-        values.append(read_sheet(spreadsheet, range=test_name))
-        spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
+    # Read data from all spreadsheets
+    try:
+        for spreadsheet in spreadsheets:
+            values.append(read_sheet(spreadsheet, range=test_name))
+            spreadsheet_names.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"])
+    except Exception as exc:
+        custom_logger.error(f"Error reading sheets: {exc}")
+        return spreadsheetId
 
+    # Group values by non-empty rows
     for index, value in enumerate(values):
         values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k)
     list_1 = list(values[0])
     list_2 = list(values[1])
 
+    # Compare and merge data from both sheets
     for value in list_1:
         for ele in list_2:
-            # Check max throughput
+            # Check max throughput or cost/hr and compare
             if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]:
                 if compare_inst(value[1][0], ele[1][0]):
                     results.append([""])
@@ -75,26 +123,29 @@ def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=["
                         results = merge_lists_alternately(results, item1, item2)
                     break
 
-
+    # Write the results back to the sheet
     try:
         create_sheet(spreadsheetId, test_name)
         custom_logger.info("Deleting existing charts and data from the sheet...")
         clear_sheet_charts(spreadsheetId, test_name)
         clear_sheet_data(spreadsheetId, test_name)
-        custom_logger.info("Appending new " + test_name + " data to sheet...")
+        custom_logger.info(f"Appending new {test_name} data to sheet...")
         append_to_sheet(spreadsheetId, results, test_name)
     except Exception as exc:
-        custom_logger.debug(str(exc))
+        custom_logger.debug(f"Error during sheet operations: {exc}")
         custom_logger.error("Failed to append data to sheet")
         return spreadsheetId
 
+    return spreadsheetId
+
 
 if __name__ == "__main__":
+    # List of spreadsheets to compare
     spreadsheets = [
-        "",
-        "",
+        "",  # Replace with actual spreadsheet IDs
+        "",  # Replace with actual spreadsheet IDs
     ]
     test_name = "pyperf"
 
-    compare_pyperf_results(spreadsheets, "", test_name,
-                           table_name=["System name"])
\ No newline at end of file
+    # Compare results and update the sheet
+    compare_pyperf_results(spreadsheets, "", test_name, table_name=["System name"])
diff --git a/quisby/benchmarks/pyperf/pyperf.py b/quisby/benchmarks/pyperf/pyperf.py
index 2f038e6..435a735 100644
--- a/quisby/benchmarks/pyperf/pyperf.py
+++ b/quisby/benchmarks/pyperf/pyperf.py
@@ -122,15 +122,14 @@ def create_summary_pyperf_data(data, OS_RELEASE):
 def extract_pyperf_data(path, system_name, OS_RELEASE):
     """"""
     results = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
+
     # Extract data from file
     summary_data = []
     try:
         if path:
             with open(path) as file:
                 pyperf_results = file.readlines()
-                summary_data.append([system_name, +server+"/results/"+result_dir+"/"+path])
         else:
             return None
     except Exception as exc:
@@ -141,4 +140,4 @@ def extract_pyperf_data(path, system_name, OS_RELEASE):
     results.append([""])
     results.append([system_name])
     results.extend(pyperf_results[1:])
-    return [results], summary_data
\ No newline at end of file
+    return [results]
\ No newline at end of file

From ab64474891d8564d3c78f0e32fd453802552e58c Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:43:26 +0530
Subject: [PATCH 09/10] remove summary creation

---
 quisby/benchmarks/hammerdb/extract.py  | 12 +-----------
 quisby/benchmarks/phoronix/phoronix.py |  7 ++-----
 quisby/benchmarks/pig/extract.py       | 11 +++--------
 quisby/benchmarks/reboot/reboot.py     | 11 ++---------
 quisby/benchmarks/speccpu/extract.py   | 18 ++++++------------
 quisby/benchmarks/specjbb/specjbb.py   |  7 +------
 quisby/benchmarks/streams/streams.py   |  6 ++----
 quisby/benchmarks/uperf/uperf.py       |  8 +-------
 quisby/example.ini                     |  4 ----
 quisby/pricing/cloud_pricing.py        |  9 +++++----
 10 files changed, 23 insertions(+), 70 deletions(-)

diff --git a/quisby/benchmarks/hammerdb/extract.py b/quisby/benchmarks/hammerdb/extract.py
index d64a4a3..2b57e57 100644
--- a/quisby/benchmarks/hammerdb/extract.py
+++ b/quisby/benchmarks/hammerdb/extract.py
@@ -1,30 +1,20 @@
-from quisby.util import read_config
-
 
 def extract_hammerdb_data(path, system_name, test_name, OS_RELEASE):
     results = []
     result_data = []
-    summary_data = []
-    summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
-
     data_index = 0
-    header_row = []
     with open(path) as file:
         hammerdb_results = file.readlines()
         for index, line in enumerate(hammerdb_results):
             if "# connection:TPM" in line:
                 data_index = index
-                header_row = line.strip("\n").split(":")
             else:
                 result_data.append(line.strip("\n").split(":"))
     result_data = result_data[data_index:]
-    summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
     results.append([""])
     results.append([f"{test_name}-User Count",
                     f"{system_name}-{OS_RELEASE}"])
     results += result_data
 
-    return results, summary_data
+    return results
diff --git a/quisby/benchmarks/phoronix/phoronix.py b/quisby/benchmarks/phoronix/phoronix.py
index 1bdcfda..c477ef5 100644
--- a/quisby/benchmarks/phoronix/phoronix.py
+++ b/quisby/benchmarks/phoronix/phoronix.py
@@ -119,15 +119,12 @@ def create_summary_phoronix_data(data, OS_RELEASE):
 def extract_phoronix_data(path, system_name, OS_RELEASE):
     """"""
     results = []
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
     # Extract data from file
     try:
         if path.endswith("results.csv"):
             with open(path) as file:
                 phoronix_results = file.readlines()
-            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
         else:
             return None
     except Exception as exc:
@@ -146,4 +143,4 @@ def extract_phoronix_data(path, system_name, OS_RELEASE):
     results.append([""])
     results.append([system_name])
     results.extend(phoronix_results[1:])
-    return [results], summary_data
+    return [results]
diff --git a/quisby/benchmarks/pig/extract.py b/quisby/benchmarks/pig/extract.py
index 15e1c6d..8e2576d 100644
--- a/quisby/benchmarks/pig/extract.py
+++ b/quisby/benchmarks/pig/extract.py
@@ -10,13 +10,9 @@ def extract_pig_data(path, system_name, OS_RELEASE):
     cpu_count = 0
     region = read_config("cloud", "region")
     cloud_type = read_config("cloud", "cloud_type")
-    # path = path + f"/iteration_1.{system_name}"
-    summary_data = []
-    summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
     data_index = 0
-    header = []
+
     try:
         with open(path) as file:
             pig_results = file.readlines()
@@ -30,7 +26,6 @@ def extract_pig_data(path, system_name, OS_RELEASE):
     except Exception as exc:
         custom_logger.error(str(exc))
         return None
-    summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
     cpu_count = get_cloud_cpu_count(
         system_name, region, cloud_type.lower()
@@ -41,5 +36,5 @@ def extract_pig_data(path, system_name, OS_RELEASE):
     results.append(["Threads", "rhel-" + f"{OS_RELEASE}"])
     results += result_data
 
-    return results, summary_data
+    return results
 
diff --git a/quisby/benchmarks/reboot/reboot.py b/quisby/benchmarks/reboot/reboot.py
index 82be17c..9937717 100644
--- a/quisby/benchmarks/reboot/reboot.py
+++ b/quisby/benchmarks/reboot/reboot.py
@@ -1,14 +1,9 @@
 import re
 import tarfile
-from quisby.util import read_config
-
 
 def extract_boot_data(path, system_name):
     results = []
-    summary_data = []
-    summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
     # system_name = path.split("_")[2]
     try:
         with open(path + "/cloud_timings") as file:
@@ -19,8 +14,6 @@ def extract_boot_data(path, system_name):
     except FileNotFoundError:
         return []
 
-    summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
-
     tar = tarfile.open(path + "/boot_info/initial_boot_info.tar")
     for member in tar.getmembers():
         if "initial_boot_info/boot_info" in str(member):
@@ -40,4 +33,4 @@ def extract_boot_data(path, system_name):
         results.append(["System name", "Start Time", "Terminate Time", "Reboot Time"])
         results.append([system_name, instance_start_time, terminate_time, reboot_time])
 
-    return results, summary_data
+    return results
diff --git a/quisby/benchmarks/speccpu/extract.py b/quisby/benchmarks/speccpu/extract.py
index 04d7143..ac23ba6 100644
--- a/quisby/benchmarks/speccpu/extract.py
+++ b/quisby/benchmarks/speccpu/extract.py
@@ -7,13 +7,9 @@
 
 def process_speccpu(path, system_name, suite, OS_RELEASE):
     results = []
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
 
     with open(path) as csv_file:
         speccpu_results = list(csv.DictReader(csv_file, delimiter=":"))
-        summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
     results.append([""])
     results.append([system_name, suite])
@@ -24,19 +20,17 @@ def process_speccpu(path, system_name, suite, OS_RELEASE):
         except Exception as exc:
             custom_logger.debug(str(exc))
             pass
-    return results,summary_data
+    return results
 
 
 def extract_speccpu_data(path, system_name, OS_RELEASE):
     results = []
     summary_data = []
     if "fprate" in path:
-        fp_results, fp_summary_data= process_speccpu(path, system_name, "fprate", OS_RELEASE)
-        results +=fp_results
-        summary_data += fp_summary_data
+        fp_results = process_speccpu(path, system_name, "fprate", OS_RELEASE)
+        results += fp_results
     elif "intrate" in path:
-        int_results, int_summary_data= process_speccpu(path, system_name, "intrate", OS_RELEASE)
-        results +=int_results
-        summary_data +=int_summary_data
+        int_results = process_speccpu(path, system_name, "intrate", OS_RELEASE)
+        results += int_results
 
-    return results, summary_data
+    return results
diff --git a/quisby/benchmarks/specjbb/specjbb.py b/quisby/benchmarks/specjbb/specjbb.py
index da6d329..ab82d4c 100644
--- a/quisby/benchmarks/specjbb/specjbb.py
+++ b/quisby/benchmarks/specjbb/specjbb.py
@@ -128,16 +128,11 @@ def create_summary_specjbb_data(specjbb_data, OS_RELEASE):
 def extract_specjbb_data(path, system_name, OS_RELEASE):
     """"""
     results = [[""], [system_name]]
-    summary_data = []
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
-
     # File read
     try:
         if path.endswith(".csv"):
             with open(path) as csv_file:
                 specjbb_results = list(csv.DictReader(csv_file, delimiter=":"))
-            summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
         else:
             return None
     except Exception as exc:
@@ -151,4 +146,4 @@ def extract_specjbb_data(path, system_name, OS_RELEASE):
         else:
             results.append([data_dict["Warehouses"], data_dict["Bops"]])
 
-    return results, summary_data
+    return results
diff --git a/quisby/benchmarks/streams/streams.py b/quisby/benchmarks/streams/streams.py
index a03c310..5577720 100644
--- a/quisby/benchmarks/streams/streams.py
+++ b/quisby/benchmarks/streams/streams.py
@@ -133,8 +133,7 @@ def extract_streams_data(path, system_name, OS_RELEASE):
 
     summary_data = []
     summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
 
     if not os.path.isfile(summary_file):
         return None
@@ -142,7 +141,6 @@ def extract_streams_data(path, system_name, OS_RELEASE):
     with open(path) as file:
         streams_results = file.readlines()
 
-    summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
     data_index = 0
     for index, data in enumerate(streams_results):
@@ -189,7 +187,7 @@ def extract_streams_data(path, system_name, OS_RELEASE):
                 data_pos = pos - 1
             proccessed_data[pos - 5].append(memory + "-" + OS_RELEASE)
             proccessed_data[data_pos].extend(row[1:])
-    return proccessed_data, summary_data
+    return proccessed_data
 
 
 if __name__ == "__main__":
diff --git a/quisby/benchmarks/uperf/uperf.py b/quisby/benchmarks/uperf/uperf.py
index 1b207c0..98d8285 100644
--- a/quisby/benchmarks/uperf/uperf.py
+++ b/quisby/benchmarks/uperf/uperf.py
@@ -137,10 +137,6 @@ def extract_uperf_data(path, system_name):
     """"""
     results = []
     data_position = {}
-    summary_data = []
-    summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
 
     tests_supported = ["tcp_stream", "tcp_rr"]
 
@@ -155,8 +151,6 @@ def extract_uperf_data(path, system_name):
         else:
             return None
 
-    summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
-
     # find all ports result index in csv row
     for index, row in enumerate(csv_reader[0]):
         if "all" in row:
@@ -213,6 +207,6 @@ def extract_uperf_data(path, system_name):
                     else:
                         results.append(*items)
 
-    return results, summary_data
+    return results
 
 
diff --git a/quisby/example.ini b/quisby/example.ini
index c877cc5..b8e0fb4 100644
--- a/quisby/example.ini
+++ b/quisby/example.ini
@@ -28,7 +28,3 @@ filename = quisby.log
 max_bytes_log_file = 5
 backup_count = 3
 
-[server]
-name = https://pbench.app.intlab.redhat.com
-result_dir =
-
diff --git a/quisby/pricing/cloud_pricing.py b/quisby/pricing/cloud_pricing.py
index 2db7bbf..d5a6c0d 100644
--- a/quisby/pricing/cloud_pricing.py
+++ b/quisby/pricing/cloud_pricing.py
@@ -71,18 +71,19 @@ def get_gcp_prices(instance_name, region):
     prefix = ""
     gcp_price_list = google_ext_prices["gcp_price_list"]
     family, model, cpu = instance_name.split("-")
-    if family.upper() in ("N2", "N2D", "T2D", "T2A", "C2", "C2D", "M1", "M2", "N1", "E2"):
+    if family.upper() in ("N2", "N2D", "T2D", "T2A", "C2", "C2D", "M1", "M2", "N1", "E2", "C4A", "C3D"):
         prefix = "CP-COMPUTEENGINE-" + family.upper() + "-PREDEFINED-VM-CORE".strip()
     else:
-        custom_logger.error("This machine price is not available")
-        return
+        custom_logger.error("Machine price is not available for :" + instance_name)
+        return None
 
     for name, prices in gcp_price_list.items():
         if prefix == name:
             for key, price in prices.items():
                 if region == key:
                     return gcp_price_list[name][region] * float(cpu)
-            return 0.0
+            custom_logger.error("Machine price is not available for region:" + region)
+            return None
 
 
 def get_aws_pricing(instance_type, region, os_type):

From b67f764651c367baf5859914a235d77687551c38 Mon Sep 17 00:00:00 2001
From: sousinha1997 <sinhasoumya97@gmail.com>
Date: Mon, 6 Jan 2025 09:44:30 +0530
Subject: [PATCH 10/10] remove summary creation

---
 quisby.py                    | 72 +++++++++---------------------------
 quisby/benchmarks/fio/fio.py |  7 ++--
 2 files changed, 20 insertions(+), 59 deletions(-)

diff --git a/quisby.py b/quisby.py
index b4b34bb..5d3457c 100644
--- a/quisby.py
+++ b/quisby.py
@@ -203,7 +203,6 @@ def data_handler(proc_list, noti_flag, exclude_list):
             print(line, end="")
 
     with open(results_path) as file:
-        summary_result = []
         custom_logger.info("Reading data files path provided in file : " + results_path)
         test_result_path = file.readlines()
         flag = False
@@ -212,14 +211,9 @@ def data_handler(proc_list, noti_flag, exclude_list):
             if "test " in data:
                 flag = False
                 if results:
-                    summary_result = [[""],[test_name]]+summary_result
-                    #TODO Check better way to add this information
-                    append_to_sheet(spreadsheetid, summary_result, "summary")
-                    spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release,
-                                                    spreadsheet_name, spreadsheetid)
+                    spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release, spreadsheet_name, spreadsheetid)
                 results = []
                 test_name = data.replace("test ", "").strip()
-                summary_result = []
                 source = "results"
                 if test_name in proc_list or proc_list == [] and test_name not in exclude_list:
                     flag = True
@@ -241,107 +235,77 @@ def data_handler(proc_list, noti_flag, exclude_list):
                     path = test_path + "/" + path.strip()
                     custom_logger.debug(path)
                     if test_name == "streams" and flag == True:
-                        ret_val, summary_data = extract_streams_data(path, system_name, os_release)
+                        ret_val = extract_streams_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "uperf" and flag == True:
-                        ret_val, summary_data = extract_uperf_data(path, system_name)
+                        ret_val = extract_uperf_data(path, system_name)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "linpack" and flag == True:
-                        ret_val, summary_data = extract_linpack_data(path, system_name)
+                        ret_val = extract_linpack_data(path, system_name)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "specjbb" and flag == True:
-                        ret_value, summary_data = extract_specjbb_data(path, system_name, os_release)
+                        ret_value = extract_specjbb_data(path, system_name, os_release)
                         if ret_value is not None:
                             results.append(ret_value)
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "pig" and flag == True:
-                        ret_val, summary_data = extract_pig_data(path, system_name, os_release)
+                        ret_val = extract_pig_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif check_test_is_hammerdb(test_name) and flag == True:
-                        ret_val, summary_data = extract_hammerdb_data(path, system_name, test_name, os_release)
+                        ret_val = extract_hammerdb_data(path, system_name, test_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "fio_run" and flag == True:
                         ret_val = None
                         if source == "results":
-                            ret_val, summary_data = extract_fio_run_data(path, system_name, os_release)
+                            ret_val = extract_fio_run_data(path, system_name, os_release)
                         elif source == "pbench":
-                            ret_val, summary_data = process_fio_run_result(path, system_name)
+                            ret_val = process_fio_run_result(path, system_name)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "boot" and flag == True:
-                        ret_val, summary_data = extract_boot_data(path, system_name)
+                        ret_val = extract_boot_data(path, system_name)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "aim" and flag == True:
                         ret_val = extract_aim_data(path, system_name)
                         if ret_val:
                             results += ret_val
                     elif test_name == "auto_hpl" and flag == True:
-                        ret_val, summary_data = extract_auto_hpl_data(path, system_name)
+                        ret_val= extract_auto_hpl_data(path, system_name)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "speccpu" and flag == True:
-                        ret_val, summary_data = extract_speccpu_data(path, system_name, os_release)
+                        ret_val = extract_speccpu_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "etcd" and flag == True:
                         ret_val = extract_etcd_data(path, system_name)
                         if ret_val:
                             results += ret_val
                     elif test_name == "coremark" and flag == True:
-                        ret_val, summary_data = extract_coremark_data(path, system_name, os_release)
+                        ret_val = extract_coremark_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "coremark_pro" and flag == True:
-                        ret_val, summary_data = extract_coremark_pro_data(path, system_name, os_release)
+                        ret_val = extract_coremark_pro_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "passmark" and flag == True:
-                        ret_val, summary_data = extract_passmark_data(path, system_name, os_release)
+                        ret_val = extract_passmark_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result += summary_data
                     elif test_name == "pyperf" and flag == True:
-                        ret_val, summary_data = extract_pyperf_data(path, system_name, os_release)
+                        ret_val = extract_pyperf_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     elif test_name == "phoronix" and flag == True:
-                        ret_val, summary_data= extract_phoronix_data(path, system_name, os_release)
+                        ret_val= extract_phoronix_data(path, system_name, os_release)
                         if ret_val:
                             results += ret_val
-                        if summary_data:
-                            summary_result +=summary_data
                     else:
                         if flag == False:
                             pass
@@ -355,10 +319,8 @@ def data_handler(proc_list, noti_flag, exclude_list):
             register_details_json(spreadsheet_name, spreadsheetid)
         else:
             try:
-                append_to_sheet(spreadsheetid, summary_result, "summary")
                 spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release, spreadsheet_name,
                                                 spreadsheetid)
-
             except Exception as exc:
                 custom_logger.error(str(exc))
                 pass
diff --git a/quisby/benchmarks/fio/fio.py b/quisby/benchmarks/fio/fio.py
index 2167220..a010e1b 100644
--- a/quisby/benchmarks/fio/fio.py
+++ b/quisby/benchmarks/fio/fio.py
@@ -116,16 +116,15 @@ def extract_fio_run_data(path, system_name, OS_RELEASE):
     results = []
     summary_data = []
     summary_file = path
-    server = read_config("server", "name")
-    result_dir = read_config("server", "result_dir")
+
+
     try:
         with open(path + "/result.csv") as csv_file:
             csv_data = csv_file.readlines()
             csv_data[-1] = csv_data[-1].strip()
             results += extract_csv_data(csv_data, os.path.basename(path))
-        summary_data.append([system_name, server + "/results/" + result_dir + "/" + path])
 
-        return group_data(results, system_name, OS_RELEASE), summary_data
+        return group_data(results, system_name, OS_RELEASE)
     except Exception as exc:
         custom_logger.error("Unable to find fio path")
         custom_logger.error(str(exc))