From d6ad7607024ac08a16742aa60a5df5ddb062d4f5 Mon Sep 17 00:00:00 2001
From: Theresa <thuang@college.harvard.edu>
Date: Sun, 22 Oct 2023 16:59:31 -0400
Subject: [PATCH 1/5] Write outputs to JSON and upload to S3 Bucket

---
 ersilia/core/tracking.py | 46 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py
index d3e7b272c..5c3ae196f 100644
--- a/ersilia/core/tracking.py
+++ b/ersilia/core/tracking.py
@@ -1,7 +1,12 @@
 from datetime import datetime
 import json
 import pandas as pd
-
+import tracemalloc
+# from ersilia import cli
+import logging
+import boto3
+from botocore.exceptions import ClientError
+import os
 
 class RunTracker:
     """
@@ -48,3 +53,42 @@ def read_json(self, result):
         data = json.load(result)
         self.log_to_console(result)
         return data
+    
+    def start(self):
+        tracemalloc.start()
+        self.time_start = tracemalloc.get_traced_memory()[0]
+    
+    def track_memory(self):
+        peak_memory = tracemalloc.get_traced_memory()[1] - self.time_start
+        print(f"Peak memory: {peak_memory}")
+        tracemalloc.stop()
+
+
+def write_file(dict):
+    str = json.dump(dict)
+    tmp = tempfile.NamedTemporaryFile()
+
+    with open(tmp.name, 'w') as f:
+        f.write(str)
+
+def upload_file(file_name, bucket, object_name=None):
+    """Upload a file to an S3 bucket
+
+    :param file_name: File to upload
+    :param bucket: Bucket to upload to
+    :param object_name: S3 object name. If not specified then file_name is used
+    :return: True if file was uploaded, else False
+    """
+
+    # If S3 object_name was not specified, use file_name
+    if object_name is None:
+        object_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + '-' + os.path.basename(file_name)
+
+    # Upload the file
+    s3_client = boto3.client('s3')
+    try:
+        response = s3_client.upload_file(file_name, bucket, object_name)
+    except ClientError as e:
+        logging.error(e)
+        return False
+    return True
\ No newline at end of file

From d13c4e566d7deb43256b244064ad303704d610c2 Mon Sep 17 00:00:00 2001
From: Anthony Cui <acui1@college.harvard.edu>
Date: Tue, 14 Nov 2023 15:56:08 -0500
Subject: [PATCH 2/5] Update imports

---
 ersilia/core/tracking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py
index 5c3ae196f..d5e75ab08 100644
--- a/ersilia/core/tracking.py
+++ b/ersilia/core/tracking.py
@@ -2,7 +2,7 @@
 import json
 import pandas as pd
 import tracemalloc
-# from ersilia import cli
+import tempfile
 import logging
 import boto3
 from botocore.exceptions import ClientError

From aa5bbf26d61798ffb4bcf419a511a0640ec6aba0 Mon Sep 17 00:00:00 2001
From: Anthony Cui <acui1@college.harvard.edu>
Date: Tue, 14 Nov 2023 16:04:43 -0500
Subject: [PATCH 3/5] Clean up memory usage code

---
 ersilia/core/tracking.py | 71 +++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py
index 1d9b04cd1..d1a25b124 100644
--- a/ersilia/core/tracking.py
+++ b/ersilia/core/tracking.py
@@ -53,10 +53,13 @@ class RunTracker:
 
     def __init__(self):
         self.time_start = None
+        self.memory_usage_start = 0
 
     # function to be called before model is run
     def start_tracking(self):
         self.time_start = datetime.now()
+        tracemalloc.start()
+        self.memory_usage_start = tracemalloc.get_traced_memory()[0]
 
     def sample_df(self, df, num_rows, num_cols):
         """
@@ -101,6 +104,38 @@ def get_file_sizes(self, input_df, output_df):
             "avg_output_size": output_avg_row_size,
         }
 
+    def check_types(self, resultDf, metadata):
+        typeDict = {"float64": "Float", "int64": "Int"}
+        count = 0
+
+        # ignore key and input columns
+        dtypesLst = resultDf.loc[:, ~resultDf.columns.isin(["key", "input"])].dtypes
+
+        for i in dtypesLst:
+            if typeDict[str(i)] != metadata["Output Type"][0]:
+                count += 1
+
+        if len(dtypesLst) > 1 and metadata["Output Shape"] != "List":
+            print("Not right shape. Expected List but got Single")
+            correct_shape = False
+        elif len(dtypesLst) == 1 and metadata["Output Shape"] != "Single":
+            print("Not right shape. Expected Single but got List")
+            correct_shape = False
+        else:
+            print("Output is correct shape.")
+            correct_shape = True
+
+        print("Output has", count, "mismatched types.\n")
+
+        return {"mismatched_types": count, "correct_shape": correct_shape}
+
+    def get_peak_memory(self):
+        # Compare memory between peak and amount when we started
+        peak_memory = tracemalloc.get_traced_memory()[1] - self.memory_usage_start
+        tracemalloc.stop()
+
+        return peak_memory
+
     def track(self, input, result, meta):
         """
         Tracks the results after a model run.
@@ -130,45 +165,13 @@ def track(self, input, result, meta):
 
         json_dict["file_sizes"] = self.get_file_sizes(input_dataframe, result_dataframe)
 
+        json_dict["peak_memory_use"] = self.get_peak_memory()
+
         json_object = json.dumps(json_dict, indent=4)
-        print("\nJSON Dictionary:\n", json_object)
 
         # log results to persistent tracking file
         write_persistent_file(json_object)
 
-    def check_types(self, resultDf, metadata):
-        typeDict = {"float64": "Float", "int64": "Int"}
-        count = 0
-
-        # ignore key and input columns
-        dtypesLst = resultDf.loc[:, ~resultDf.columns.isin(["key", "input"])].dtypes
-
-        for i in dtypesLst:
-            if typeDict[str(i)] != metadata["Output Type"][0]:
-                count += 1
-
-        if len(dtypesLst) > 1 and metadata["Output Shape"] != "List":
-            print("Not right shape. Expected List but got Single")
-            correct_shape = False
-        elif len(dtypesLst) == 1 and metadata["Output Shape"] != "Single":
-            print("Not right shape. Expected Single but got List")
-            correct_shape = False
-        else:
-            print("Output is correct shape.")
-            correct_shape = True
-
-        print("Output has", count, "mismatched types.\n")
-
-        return {"mismatched_types": count, "correct_shape": correct_shape}
-
-    def start(self):
-        tracemalloc.start()
-        self.time_start = tracemalloc.get_traced_memory()[0]
-
-    def track_memory(self):
-        peak_memory = tracemalloc.get_traced_memory()[1] - self.time_start
-        print(f"Peak memory: {peak_memory}")
-        tracemalloc.stop()
 
 def write_file(dict):
     str = json.dump(dict)

From 78e9f926fc83645c2adaeabed778faa9674e8320 Mon Sep 17 00:00:00 2001
From: Anthony Cui <acui1@college.harvard.edu>
Date: Tue, 14 Nov 2023 16:37:09 -0500
Subject: [PATCH 4/5] Clean up s3 code

---
 ersilia/core/tracking.py | 63 ++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py
index d1a25b124..9888f8848 100644
--- a/ersilia/core/tracking.py
+++ b/ersilia/core/tracking.py
@@ -42,6 +42,35 @@ def close_persistent_file():
         )
         os.rename(PERSISTENT_FILE_PATH, new_file_path)
 
+def upload_to_s3(json_dict, bucket="t4sg-ersilia", object_name=None):
+    """Upload a file to an S3 bucket
+
+    :param json_dict: JSON object to upload
+    :param bucket: Bucket to upload to
+    :param object_name: S3 object name. If not specified then we generate a name based on the timestamp and model id.
+    :return: True if file was uploaded, else False
+    """
+
+    # If S3 object_name was not specified, use file_name
+    if object_name is None:
+        object_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + '-' + json_dict["model_id"]
+
+    # Dump JSON into a temporary file to upload
+    json_str = json.dumps(json_dict, indent=4)
+    tmp = tempfile.NamedTemporaryFile()
+
+    with open(tmp.name, 'w') as f:
+        f.write(json_str)
+        f.flush()
+
+        # Upload the file
+        s3_client = boto3.client('s3')
+        try:
+            s3_client.upload_file(tmp.name, bucket, f"{object_name}.json")
+        except ClientError as e:
+            logging.error(e)
+            return False
+    return True
 
 class RunTracker:
     """
@@ -167,37 +196,9 @@ def track(self, input, result, meta):
 
         json_dict["peak_memory_use"] = self.get_peak_memory()
 
-        json_object = json.dumps(json_dict, indent=4)
-
         # log results to persistent tracking file
+        json_object = json.dumps(json_dict, indent=4)
         write_persistent_file(json_object)
 
-
-def write_file(dict):
-    str = json.dump(dict)
-    tmp = tempfile.NamedTemporaryFile()
-
-    with open(tmp.name, 'w') as f:
-        f.write(str)
-
-def upload_file(file_name, bucket, object_name=None):
-    """Upload a file to an S3 bucket
-
-    :param file_name: File to upload
-    :param bucket: Bucket to upload to
-    :param object_name: S3 object name. If not specified then file_name is used
-    :return: True if file was uploaded, else False
-    """
-
-    # If S3 object_name was not specified, use file_name
-    if object_name is None:
-        object_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + '-' + os.path.basename(file_name)
-
-    # Upload the file
-    s3_client = boto3.client('s3')
-    try:
-        response = s3_client.upload_file(file_name, bucket, object_name)
-    except ClientError as e:
-        logging.error(e)
-        return False
-    return True
\ No newline at end of file
+        # Upload run stats to s3
+        upload_to_s3(json_dict)

From d1c2adbbee1192156f35e1ec3fe6e9e06a4ffd7e Mon Sep 17 00:00:00 2001
From: Anthony Cui <acui1@college.harvard.edu>
Date: Tue, 14 Nov 2023 16:37:49 -0500
Subject: [PATCH 5/5] Refactor code

---
 ersilia/core/tracking.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py
index 9888f8848..5cb9d1e27 100644
--- a/ersilia/core/tracking.py
+++ b/ersilia/core/tracking.py
@@ -42,6 +42,7 @@ def close_persistent_file():
         )
         os.rename(PERSISTENT_FILE_PATH, new_file_path)
 
+
 def upload_to_s3(json_dict, bucket="t4sg-ersilia", object_name=None):
     """Upload a file to an S3 bucket
 
@@ -53,18 +54,20 @@ def upload_to_s3(json_dict, bucket="t4sg-ersilia", object_name=None):
 
     # If S3 object_name was not specified, use file_name
     if object_name is None:
-        object_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + '-' + json_dict["model_id"]
+        object_name = (
+            datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + "-" + json_dict["model_id"]
+        )
 
     # Dump JSON into a temporary file to upload
     json_str = json.dumps(json_dict, indent=4)
     tmp = tempfile.NamedTemporaryFile()
 
-    with open(tmp.name, 'w') as f:
+    with open(tmp.name, "w") as f:
         f.write(json_str)
         f.flush()
 
         # Upload the file
-        s3_client = boto3.client('s3')
+        s3_client = boto3.client("s3")
         try:
             s3_client.upload_file(tmp.name, bucket, f"{object_name}.json")
         except ClientError as e:
@@ -72,6 +75,7 @@ def upload_to_s3(json_dict, bucket="t4sg-ersilia", object_name=None):
             return False
     return True
 
+
 class RunTracker:
     """
     This class will be responsible for tracking model runs. It calculates the desired metadata based on a model's