diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py index eefd5c76d..f61464f15 100644 --- a/ersilia/core/tracking.py +++ b/ersilia/core/tracking.py @@ -14,7 +14,36 @@ TEMP_FILE_LOGS = os.path.abspath("") +def create_csv(output_df): + """ + This function takes in the output dataframe from the model run and returns + a new temporary csv file that will later be passed to CDD vault. The CSV + file has two columns: the first column is the input molecules and the + second column is the ISO-formatted time of the run. + + :param file: The output dataframe from the model run + :return: A new temporary csv file + """ + + new_df = output_df[['input']].copy() + current_time = datetime.now().isoformat() + + new_df['time'] = current_time + csv_file = tempfile.NamedTemporaryFile(mode="w", suffix=".csv") + new_df.to_csv(csv_file.name, index=False) + + return csv_file + + def log_files_metrics(file): + """ + This function will log the number of errors and warnings in the log files. + + :param file: The log file to be read + :return: None (writes to file) + """ + + error_count = 0 warning_count = 0 @@ -218,6 +247,15 @@ def get_file_sizes(self, input_df, output_df): } def check_types(self, resultDf, metadata): + """ + This class is responsible for checking the types of the output dataframe against the expected types. + This includes checking the shape of the output dataframe (list vs single) and the types of each column. + + :param resultDf: The output dataframe + :param metadata: The metadata dictionary + :return: A dictionary containing the number of mismatched types and a boolean for whether the shape is correct + """ + typeDict = {"float64": "Float", "int64": "Int"} count = 0 @@ -237,7 +275,8 @@ def check_types(self, resultDf, metadata): else: print("Output is correct shape.") correct_shape = True - + + print(resultDf) print("Output has", count, "mismatched types.\n") return {"mismatched_types": count, "correct_shape": correct_shape} @@ -284,5 +323,7 @@ def track(self, input, result, meta): json_object = json.dumps(json_dict, indent=4) write_persistent_file(json_object) + create_csv(result_dataframe) + # Upload run stats to s3 upload_to_s3(json_dict)