From d94d5f36efbfb9eb44337262a8e41ef7c3a22f41 Mon Sep 17 00:00:00 2001
From: NickRoss <rossnc@gmail.com>
Date: Tue, 17 Sep 2024 15:58:44 -0500
Subject: [PATCH 1/2] My name is Todd and I'm fixing bugs

---
 dashboard/components/Dashboard.js             | 67 ++++++++++---------
 .../components/DashboardDisplayControls.js    |  7 +-
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/dashboard/components/Dashboard.js b/dashboard/components/Dashboard.js
index de3979a..4401ca2 100644
--- a/dashboard/components/Dashboard.js
+++ b/dashboard/components/Dashboard.js
@@ -1,10 +1,9 @@
 "use client";
-import React from "react";
+import Alert from "@/components/Alert";
+import { col2material } from "@/lib/constants";
+import state from "@/lib/state";
 import Plot from "react-plotly.js";
 import { useSnapshot } from "valtio";
-import state from "@/lib/state";
-import { col2material } from "@/lib/constants";
-import Alert from "@/components/Alert";
 
 export default function Dashboard() {
   const snap = useSnapshot(state);
@@ -47,31 +46,34 @@ export default function Dashboard() {
   const plotData =
     Object.keys(snap.data).length > 0
       ? snap.data.data.map((d) => {
-          console.log(d);
-          const materialClass = d["Material Class I"];
-          const color = class2color[materialClass] || "#000";
-          const countDisplay =
-            snap.filters["testMethod"] === "Mesh Bag"
-              ? ` (n=${d["count"]})`
-              : "";
-          // Replace "Positive" with "Pos." in labels and append count
-          const name = `${d["aggCol"]}${countDisplay}`.replace(
-            "Positive",
-            "Pos."
-          );
-          const wrappedName = wrapLabel(name);
-
-          return {
-            type: "box",
-            name: wrappedName,
-            y: [d.min, d.q1, d.median, d.q3, d.max],
-            marker: { color },
-            boxmean: true,
-            line: { width: 3.25 },
-          };
-        })
+        const materialClass = d["Material Class I"];
+        const color = class2color[materialClass] || "#000";
+        const countDisplay =
+          snap.filters["testMethod"] === "Mesh Bag"
+            ? ` (n=${d["count"]})`
+            : "";
+        // Replace "Positive" with "Pos." in labels and append count
+        const name = `${d["aggCol"]}${countDisplay}`.replace(
+          "Positive",
+          "Pos."
+        );
+        const wrappedName = wrapLabel(name);
+
+        return {
+          type: "box",
+          name: wrappedName,
+          y: [d.min, d.q1, d.median, d.q3, d.max],
+          marker: { color },
+          boxmean: true,
+          line: { width: 3.25 },
+        };
+      })
       : [];
 
+  const cleanDisplayCol = snap.filters.displayResiduals === "Disintegrated"
+    ? snap.filters.displayCol.replace("Residuals", "Disintegrated")
+    : snap.filters.displayCol
+
   function generateYAxisTitle(displayCol, cap) {
     let yAxisTitle = `${displayCol}`;
     if (cap) {
@@ -80,16 +82,19 @@ export default function Dashboard() {
     return yAxisTitle;
   }
   const yAxisTitle = generateYAxisTitle(
-    snap.filters.displayCol,
+    cleanDisplayCol,
     !snap.filters.uncapResults
   );
 
-  function generateTitle(displayCol, aggCol, num_trials) {
-    return `${displayCol} by ${col2material[aggCol]} - ${num_trials} Trial(s)`;
+
+
+  function generateTitle(cleanDisplayCol, aggCol, num_trial) {
+    return `${cleanDisplayCol} by ${col2material[aggCol]} - ${num_trial} Trial(s)`;
   }
 
+
   const title = generateTitle(
-    snap.filters.displayCol,
+    cleanDisplayCol,
     snap.filters.aggCol,
     snap.data.numTrials
   );
diff --git a/dashboard/components/DashboardDisplayControls.js b/dashboard/components/DashboardDisplayControls.js
index ad9693f..65c1f5e 100644
--- a/dashboard/components/DashboardDisplayControls.js
+++ b/dashboard/components/DashboardDisplayControls.js
@@ -1,10 +1,9 @@
 "use client";
-import React from "react";
-import RadioSingleSelect from "./RadioSingleSelect";
-import { material2col, residuals2col, display2col } from "@/lib/constants";
-import { useSnapshot } from "valtio";
+import { display2col, material2col, residuals2col } from "@/lib/constants";
 import state from "@/lib/state";
 import { InformationCircleIcon } from "@heroicons/react/24/solid";
+import { useSnapshot } from "valtio";
+import RadioSingleSelect from "./RadioSingleSelect";
 
 export default function DashboardDisplayControls() {
   const snap = useSnapshot(state);

From 9092a03f95c1577a27ced89637ee0f8deb0578fd Mon Sep 17 00:00:00 2001
From: NickRoss <rossnc@gmail.com>
Date: Tue, 17 Sep 2024 16:20:21 -0500
Subject: [PATCH 2/2] Fixing linting and adding to gitignore

---
 .gitignore                   |   1 +
 scripts/pipeline-template.py | 233 ++++++++++++++++++++++++++---------
 2 files changed, 177 insertions(+), 57 deletions(-)

diff --git a/.gitignore b/.gitignore
index bde32a7..1f259fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .env*
+.local.env
 service-account-key-base64.txt
 compostable-428115-5dde0b40960b.json
 data/*
diff --git a/scripts/pipeline-template.py b/scripts/pipeline-template.py
index 5edb2c1..492e3bc 100644
--- a/scripts/pipeline-template.py
+++ b/scripts/pipeline-template.py
@@ -29,7 +29,9 @@
     "% Residuals (Area)",
 ]
 
-ITEMS_PATH = DATA_DIR / "CFTP Test Item Inventory with Dimensions - All Trials.xlsx"
+ITEMS_PATH = (
+    DATA_DIR / "CFTP Test Item Inventory with Dimensions - All Trials.xlsx"
+)
 EXTRA_ITEMS_PATH = DATA_DIR / "Item IDS for CASP004 CASP003.xlsx"
 
 df_items = pd.read_excel(ITEMS_PATH, sheet_name=0, skiprows=3)
@@ -42,7 +44,10 @@
 OUTLIER_THRESHOLD = 10
 
 item2id = {
-    key.strip(): value for key, value in df_items.set_index("Item Description Refined")["Item ID"].to_dict().items()
+    key.strip(): value
+    for key, value in df_items.set_index("Item Description Refined")["Item ID"]
+    .to_dict()
+    .items()
 }
 
 extra_items = pd.read_excel(EXTRA_ITEMS_PATH)
@@ -74,7 +79,9 @@ def map_technology(trial_id: str) -> str:
     return "Unknown"
 
 
-TRIALS_PATH = DATA_DIR / "CFTP Anonymized Data Compilation Overview - For Sharing.xlsx"
+TRIALS_PATH = (
+    DATA_DIR / "CFTP Anonymized Data Compilation Overview - For Sharing.xlsx"
+)
 df_trials = pd.read_excel(TRIALS_PATH, skiprows=3)
 
 trial2id = {
@@ -100,9 +107,13 @@ def map_technology(trial_id: str) -> str:
     "Facility 10": "WR005-01",
 }
 
-OPERATING_CONDITIONS_PATH = DATA_DIR / "Donated Data 2023 - Compiled Facility Conditions for DSI.xlsx"
+OPERATING_CONDITIONS_PATH = (
+    DATA_DIR / "Donated Data 2023 - Compiled Facility Conditions for DSI.xlsx"
+)
 
-df_temps = pd.read_excel(OPERATING_CONDITIONS_PATH, sheet_name=3, skiprows=1, index_col="Day #")
+df_temps = pd.read_excel(
+    OPERATING_CONDITIONS_PATH, sheet_name=3, skiprows=1, index_col="Day #"
+)
 df_temps.columns = [trial2id[col.replace("*", "")] for col in df_temps.columns]
 df_temps_avg = df_temps.mean().to_frame("Average Temperature (F)")
 df_temps["Operating Condition"] = "Temperature"
@@ -113,29 +124,44 @@ def map_technology(trial_id: str) -> str:
     sheet_name=2,
     skiprows=3,
 )
-df_trial_duration.columns = [col.replace("\n", "").strip() for col in df_trial_duration.columns]
-df_trial_duration = df_trial_duration[["Facility Designation", "Endpoint Analysis (trial length)"]].rename(
+df_trial_duration.columns = [
+    col.replace("\n", "").strip() for col in df_trial_duration.columns
+]
+df_trial_duration = df_trial_duration[
+    ["Facility Designation", "Endpoint Analysis (trial length)"]
+].rename(
     columns={
         "Facility Designation": "Trial ID",
         "Endpoint Analysis (trial length)": "Trial Duration",
     }
 )
 df_trial_duration["Trial ID"] = (
-    df_trial_duration["Trial ID"].str.replace("( ", "(", regex=False).str.replace(" )", ")", regex=False).map(trial2id)
+    df_trial_duration["Trial ID"]
+    .str.replace("( ", "(", regex=False)
+    .str.replace(" )", ")", regex=False)
+    .map(trial2id)
 )
 df_trial_duration = df_trial_duration.set_index("Trial ID")
 
-df_moisture = pd.read_excel(OPERATING_CONDITIONS_PATH, sheet_name=4, skiprows=1, index_col="Week")
+df_moisture = pd.read_excel(
+    OPERATING_CONDITIONS_PATH, sheet_name=4, skiprows=1, index_col="Week"
+)
 # Filter out rows with non-numeric week values
 df_moisture = df_moisture.reset_index()
-df_moisture = df_moisture[pd.to_numeric(df_moisture["Week"], errors="coerce").notna()]
+df_moisture = df_moisture[
+    pd.to_numeric(df_moisture["Week"], errors="coerce").notna()
+]
 df_moisture = df_moisture.set_index("Week")
-df_moisture.columns = [trial2id[col.replace("*", "")] for col in df_moisture.columns]
+df_moisture.columns = [
+    trial2id[col.replace("*", "")] for col in df_moisture.columns
+]
 df_moisture_avg = df_moisture.mean().to_frame("Average % Moisture (In Field)")
 df_moisture["Operating Condition"] = "Moisture"
 df_moisture["Time Unit"] = "Week"
 
-df_o2 = pd.read_excel(OPERATING_CONDITIONS_PATH, sheet_name=6, skiprows=1, index_col="Week")
+df_o2 = pd.read_excel(
+    OPERATING_CONDITIONS_PATH, sheet_name=6, skiprows=1, index_col="Week"
+)
 df_o2 = df_o2.reset_index()
 df_o2 = df_o2[pd.to_numeric(df_o2["Week"], errors="coerce").notna()]
 df_o2 = df_o2.set_index("Week")
@@ -143,7 +169,9 @@ def map_technology(trial_id: str) -> str:
 df_o2["Operating Condition"] = "Oxygen"
 df_o2["Time Unit"] = "Week"
 
-df_operating_conditions_avg = pd.concat([df_trial_duration, df_temps_avg, df_moisture_avg], axis=1)
+df_operating_conditions_avg = pd.concat(
+    [df_trial_duration, df_temps_avg, df_moisture_avg], axis=1
+)
 
 processed_data = []
 
@@ -189,16 +217,24 @@ def __init__(
         filename = self.data_filepath.stem
         self.trial_name = trial_name
         self.trials = trials
-        file_suffix = f"_{trial_name}_clean.csv" if self.trial_name else "_clean.csv"
-        self.output_filepath = self.data_filepath.with_name(filename + file_suffix)
+        file_suffix = (
+            f"_{trial_name}_clean.csv" if self.trial_name else "_clean.csv"
+        )
+        self.output_filepath = self.data_filepath.with_name(
+            filename + file_suffix
+        )
 
         # TODO: This is kind of messy and could probably be better
-        self.raw_data = self.load_data(data_filepath, sheet_name=sheet_name, skiprows=skiprows)
+        self.raw_data = self.load_data(
+            data_filepath, sheet_name=sheet_name, skiprows=skiprows
+        )
         self.items = items
         self.item2id = item2id
 
     @abstractmethod
-    def load_data(self, data_filepath: Path, sheet_name: int = 0, skip_rows: int = 0) -> pd.DataFrame:
+    def load_data(
+        self, data_filepath: Path, sheet_name: int = 0, skip_rows: int = 0
+    ) -> pd.DataFrame:
         """Loads data from the specified file.
 
         This method should be implemented by subclasses to load data from the
@@ -262,7 +298,9 @@ def merge_with_trials(self, data: pd.DataFrame) -> pd.DataFrame:
         Returns:
             Data merged with trial information.
         """
-        return data.merge(self.trials, left_on="Trial ID", right_on="Public Trial ID")
+        return data.merge(
+            self.trials, left_on="Trial ID", right_on="Public Trial ID"
+        )
 
     def run(self, save: bool = False) -> pd.DataFrame:
         """Runs the data pipeline.
@@ -294,7 +332,9 @@ def run(self, save: bool = False) -> pd.DataFrame:
 class NewTemplatePipeline(AbstractDataPipeline):
     """Pipeline for processing data from the new template."""
 
-    def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0) -> pd.DataFrame:
+    def load_data(
+        self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0
+    ) -> pd.DataFrame:
         """Loads data from the specified CSV file.
 
         Args:
@@ -308,7 +348,8 @@ def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0)
         # Read the CSV file into a DataFrame
         data = pd.read_csv(data_filepath)
 
-        # Find the index of the first completely empty row — formatted so there's comments below the data
+        # Find the index of the first completely empty row — formatted
+        # so there's comments below the data
         first_empty_row_index = data[data.isna().all(axis=1)].index.min()
 
         # If an empty row is found, drop all rows below it
@@ -331,13 +372,22 @@ def preprocess_data(self, data):
                 "Trial": "Trial ID",
             }
         )
-        percentage_cols = ["% Residuals (Dry Weight)", "% Residuals (Wet Weight)", "% Residuals (Area)"]
+        percentage_cols = [
+            "% Residuals (Dry Weight)",
+            "% Residuals (Wet Weight)",
+            "% Residuals (Area)",
+        ]
         data[percentage_cols] = data[percentage_cols].replace("no data", np.nan)
-        # TODO: Depending on how the data actually comes in, maybe we don't want to do it this way?
-        data[percentage_cols] = data[percentage_cols].replace("%", "", regex=True).astype(float) / 100
+        # TODO: Depending data actually comes in, maybe we don't want to do it this way?
+        data[percentage_cols] = (
+            data[percentage_cols].replace("%", "", regex=True).astype(float)
+            / 100
+        )
 
         # Prefer dry weight to wet weight if available
-        data["% Residuals (Mass)"] = data["% Residuals (Dry Weight)"].fillna(data["% Residuals (Wet Weight)"])
+        data["% Residuals (Mass)"] = data["% Residuals (Dry Weight)"].fillna(
+            data["% Residuals (Wet Weight)"]
+        )
 
         return data
 
@@ -350,7 +400,9 @@ def join_with_items(self, data):
         Returns:
             The joined data
         """
-        return self.items.drop_duplicates(subset="Item Name").merge(data, on="Item Name")
+        return self.items.drop_duplicates(subset="Item Name").merge(
+            data, on="Item Name"
+        )
 
     def merge_with_trials(self, data):
         """Join with the trials table
@@ -366,12 +418,19 @@ def merge_with_trials(self, data):
             "Test Method": "Mesh Bag",
             "Technology": "Windrow",
         }
-        self.trials = pd.concat([self.trials, pd.DataFrame(dummy_trial, index=[0])], ignore_index=True)
+        self.trials = pd.concat(
+            [self.trials, pd.DataFrame(dummy_trial, index=[0])],
+            ignore_index=True,
+        )
         return data.merge(self.trials, on="Trial ID")
 
 
-NEW_TEMPLATE_PATH = DATA_DIR / "CFTP_DisintegrationDataInput_Template_sept92024.csv"
-new_template_pipeline = NewTemplatePipeline(NEW_TEMPLATE_PATH, trial_name="Dummy Data for New Template")
+NEW_TEMPLATE_PATH = (
+    DATA_DIR / "CFTP_DisintegrationDataInput_Template_sept92024.csv"
+)
+new_template_pipeline = NewTemplatePipeline(
+    NEW_TEMPLATE_PATH, trial_name="Dummy Data for New Template"
+)
 # TODO: This is commented out so we don't add the dummy data to the "real" data
 # processed_data.append(new_template_pipeline.run())
 
@@ -387,22 +446,28 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
             **kwargs: Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
-        # We are using the start weight specific to this trial so drop the Start Weight column
+        # We are using the start weight specific to this trial
+        # so drop the Start Weight column
         # Start weight is set in preprocess_data
         self.items = self.items.drop("Start Weight", axis=1)
 
-    def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0) -> pd.DataFrame:
+    def load_data(
+        self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0
+    ) -> pd.DataFrame:
         """Loads data from the specified Excel file.
 
         Args:
             data_filepath (Path): Path to the data file.
             sheet_name (int, optional): Sheet name or index to load. Defaults to 0.
-            skiprows (int, optional): Number of rows to skip at the start of the file. Defaults to 0.
+            skiprows (int, optional): Number of rows to skip at the start
+                of the file. Defaults to 0.
 
         Returns:
             Loaded data.
         """
-        return pd.read_excel(data_filepath, sheet_name=sheet_name, skiprows=skiprows)
+        return pd.read_excel(
+            data_filepath, sheet_name=sheet_name, skiprows=skiprows
+        )
 
     def preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
         """Preprocesses the data.
@@ -429,22 +494,34 @@ def preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
         data["Trial"] = data["Trial Id"]
 
         # Take the average of the three weight observations
-        data["End Weight"] = data[["Weight 1", "Weight 2", "Weight 3"]].mean(axis=1)
+        data["End Weight"] = data[["Weight 1", "Weight 2", "Weight 3"]].mean(
+            axis=1
+        )
 
         # Null values mean the item fully disintegrated
         data["End Weight"] = data["End Weight"].fillna(0)
 
         # Ok...we need to do some weird items work arounds here...this might work?
-        casp004_items = pd.read_excel(self.data_filepath, sheet_name=2).drop_duplicates(subset=["Item Name"])
-        casp004_weights = casp004_items.set_index("Item Name")["Weight (average)"].to_dict()
+        casp004_items = pd.read_excel(
+            self.data_filepath, sheet_name=2
+        ).drop_duplicates(subset=["Item Name"])
+        casp004_weights = casp004_items.set_index("Item Name")[
+            "Weight (average)"
+        ].to_dict()
         data["Start Weight"] = data["Product Name"].map(casp004_weights)
         # rename so this matches the other trials
         data["Item Description Refined"] = data["Product Name"]
 
         # TODO: Some of this should be in the abstract method...
-        data["Item ID"] = data["Item Description Refined"].str.strip().map(self.item2id)
+        data["Item ID"] = (
+            data["Item Description Refined"].str.strip().map(self.item2id)
+        )
         # Prevent duplicate columns when merging with items
-        data = data.rename(columns={"Item Description Refined": "Item Description Refined (Trial)"})
+        data = data.rename(
+            columns={
+                "Item Description Refined": "Item Description Refined (Trial)"
+            }
+        )
         data["Trial ID"] = "CASP004-01"
         if data["Item ID"].isna().sum() > 0:
             raise ValueError("There are null items after mapping")
@@ -463,7 +540,9 @@ def calculate_results(self, data: pd.DataFrame) -> pd.DataFrame:
         Returns:
             Data with calculated results.
         """
-        data["End Weight"] = data[["Weight 1", "Weight 2", "Weight 3"]].mean(axis=1)
+        data["End Weight"] = data[["Weight 1", "Weight 2", "Weight 3"]].mean(
+            axis=1
+        )
         data["End Weight"] = data["End Weight"].fillna(0)
 
         data["% Residuals (Area)"] = None
@@ -471,8 +550,13 @@ def calculate_results(self, data: pd.DataFrame) -> pd.DataFrame:
         return data
 
 
-CASP004_PATH = DATA_DIR / "CASP004-01 - Results Pre-Processed for Analysis from PDF Tables.xlsx"
-casp004_pipeline = CASP004Pipeline(CASP004_PATH, sheet_name=1, trial_name="casp004")
+CASP004_PATH = (
+    DATA_DIR
+    / "CASP004-01 - Results Pre-Processed for Analysis from PDF Tables.xlsx"
+)
+casp004_pipeline = CASP004Pipeline(
+    CASP004_PATH, sheet_name=1, trial_name="casp004"
+)
 processed_data.append(casp004_pipeline.run())
 
 
@@ -521,7 +605,9 @@ def melt_trial(self, data: pd.DataFrame, value_name: str) -> pd.DataFrame:
             .reset_index(drop=True)
         )
 
-    def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0) -> pd.DataFrame:
+    def load_data(
+        self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0
+    ) -> pd.DataFrame:
         """Loads data from the specified Excel file.
 
         Args:
@@ -562,26 +648,38 @@ def preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
         return data
 
 
-TEN_TRIALS_PATH = DATA_DIR / "Donated Data 2023 - Compiled Field Results for DSI.xlsx"
-closed_loop_pipeline = ClosedLoopPipeline(TEN_TRIALS_PATH, trial_name="closed_loop")
+TEN_TRIALS_PATH = (
+    DATA_DIR / "Donated Data 2023 - Compiled Field Results for DSI.xlsx"
+)
+closed_loop_pipeline = ClosedLoopPipeline(
+    TEN_TRIALS_PATH, trial_name="closed_loop"
+)
 processed_data.append(closed_loop_pipeline.run())
 
 
 class PDFPipeline(AbstractDataPipeline):
     """Pipeline for processing PDF trial data."""
 
-    def __init__(self, *args: Any, weight_col: str = "Residual Weight - Oven-dry", **kwargs: Any) -> None:
+    def __init__(
+        self,
+        *args: Any,
+        weight_col: str = "Residual Weight - Oven-dry",
+        **kwargs: Any,
+    ) -> None:
         """Initializes the PDFPipeline with the given parameters.
 
         Args:
             *args: Arbitrary non-keyword arguments.
-            weight_col: Column name for the residual weight. Defaults to "Residual Weight - Oven-dry".
+            weight_col: Column name for the residual weight.
+                Defaults to "Residual Weight - Oven-dry".
             **kwargs: Arbitrary keyword arguments.
         """
         super().__init__(*args, **kwargs)
         self.weight_col = weight_col
 
-    def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0) -> pd.DataFrame:
+    def load_data(
+        self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0
+    ) -> pd.DataFrame:
         """Loads data from the specified Excel file.
 
         Args:
@@ -592,7 +690,9 @@ def load_data(self, data_filepath: Path, sheet_name: int = 0, skiprows: int = 0)
         Returns:
             Loaded data.
         """
-        return pd.read_excel(data_filepath, sheet_name=sheet_name, skiprows=skiprows)
+        return pd.read_excel(
+            data_filepath, sheet_name=sheet_name, skiprows=skiprows
+        )
 
     def join_with_items(self, data: pd.DataFrame) -> pd.DataFrame:
         """Joins the data with item information.
@@ -606,10 +706,16 @@ def join_with_items(self, data: pd.DataFrame) -> pd.DataFrame:
         Returns:
             Data joined with item information.
         """
-        # TODO: Do we want to merge on ID or should we just merge on description if we have it?
-        data["Item ID"] = data["Item Description Refined"].str.strip().map(self.item2id)
+        # TODO: Merge on ID or should we just merge on description if we have it?
+        data["Item ID"] = (
+            data["Item Description Refined"].str.strip().map(self.item2id)
+        )
         # Prevent duplicate columns when merging with items
-        data = data.rename(columns={"Item Description Refined": "Item Description Refined (Trial)"})
+        data = data.rename(
+            columns={
+                "Item Description Refined": "Item Description Refined (Trial)"
+            }
+        )
         drop_cols = ["Item Description From Trial"]
         data = data.drop(drop_cols, axis=1)
         if data["Item ID"].isna().sum() > 0:
@@ -628,7 +734,9 @@ def calculate_results(self, data: pd.DataFrame) -> pd.DataFrame:
         Returns:
             Data with calculated results.
         """
-        data["% Residuals (Mass)"] = data[self.weight_col] / (data["Start Weight"] * data["Number of Items per bag"])
+        data["% Residuals (Mass)"] = data[self.weight_col] / (
+            data["Start Weight"] * data["Number of Items per bag"]
+        )
         data["% Residuals (Area)"] = None
         data["Trial"] = data["Trial ID"]
         return data
@@ -636,7 +744,9 @@ def calculate_results(self, data: pd.DataFrame) -> pd.DataFrame:
 
 PDF_TRIALS = DATA_DIR / "Compiled Field Results - CFTP Gathered Data.xlsx"
 
-ad001_pipeline = PDFPipeline(PDF_TRIALS, trial_name="ad001", sheet_name=0, skiprows=1)
+ad001_pipeline = PDFPipeline(
+    PDF_TRIALS, trial_name="ad001", sheet_name=0, skiprows=1
+)
 processed_data.append(ad001_pipeline.run())
 
 wr001_pipeline = PDFPipeline(PDF_TRIALS, trial_name="wr001", sheet_name=1)
@@ -686,7 +796,9 @@ def preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
 
 # Exclude mixed materials and multi-laminate pouches
 all_trials = all_trials[~(all_trials["Material Class II"] == "Mixed Materials")]
-all_trials = all_trials[~(all_trials["Item Name"] == "Multi-laminate stand-up pounch with zipper")]
+all_trials = all_trials[
+    ~(all_trials["Item Name"] == "Multi-laminate stand-up pounch with zipper")
+]
 # Exclude anything over 1000% as outlier
 all_trials = all_trials[all_trials["% Residuals (Mass)"] < OUTLIER_THRESHOLD]
 
@@ -699,7 +811,8 @@ def preprocess_data(self, data: pd.DataFrame) -> pd.DataFrame:
 
 
 def anonymize_brand(brand: str) -> str:
-    """Anonymizes brand names by mapping them to a generic brand. Sorry for the global variable.
+    """Anonymizes brand names by mapping them to a generic brand.
+        Sorry for the global variable.
 
     Args:
         brand: The brand name
@@ -720,17 +833,23 @@ def anonymize_brand(brand: str) -> str:
 
 
 # Make sure all trial IDs are represented in operating conditions
-unique_trial_ids = pd.DataFrame(all_trials["Trial ID"].unique(), columns=["Trial ID"]).set_index("Trial ID")
+unique_trial_ids = pd.DataFrame(
+    all_trials["Trial ID"].unique(), columns=["Trial ID"]
+).set_index("Trial ID")
 df_operating_conditions_avg = unique_trial_ids.merge(
     df_operating_conditions_avg, left_index=True, right_index=True, how="left"
 )
 
 operating_conditions_avg_output_path = DATA_DIR / "operating_conditions_avg.csv"
-df_operating_conditions_avg.to_csv(operating_conditions_avg_output_path, index_label="Trial ID")
+df_operating_conditions_avg.to_csv(
+    operating_conditions_avg_output_path, index_label="Trial ID"
+)
 
 # Save full operating conditions data
 operating_conditions_output_path = DATA_DIR / "operating_conditions_full.csv"
 df_operating_conditions = pd.concat([df_temps, df_moisture, df_o2], axis=0)
-df_operating_conditions.to_csv(operating_conditions_output_path, index=True, index_label="Time Step")
+df_operating_conditions.to_csv(
+    operating_conditions_output_path, index=True, index_label="Time Step"
+)
 
 print("Complete!")