[TF-TRT] Remote Upload Implemented

tensorflow · Aug 3, 2022 · f8c43a2 · f8c43a2
1 parent 0de3370
commit f8c43a2
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 16 deletions.
diff --git a/tftrt/benchmarking-python/benchmark_args.py b/tftrt/benchmarking-python/benchmark_args.py
@@ -424,8 +424,16 @@ def _validate_args(self, args):
                         "doesn't exist or is not a directory"
                     )
 
-        if args.upload_metrics_endpoint is not None:
-            raise NotImplementedError("This feature is not yet implemented.")
+        # yapf: disable
+        if (
+            args.upload_metrics_endpoint is not None and
+            args.experiment_name is None
+        ):
+            raise NotImplementedError(
+                "--experiment_name must be specified if "
+                "--upload_metrics_endpoint is set."
+            )
+        # yapf: enable
 
     def _post_process_args(self, args):
         if args.use_synthetic_data:

diff --git a/tftrt/benchmarking-python/benchmark_runner.py b/tftrt/benchmarking-python/benchmark_runner.py
@@ -9,6 +9,7 @@
 import json
 import logging as _logging
 import os
+import requests
 import sys
 import time
 
@@ -32,6 +33,7 @@
 from benchmark_logger import logging
 
 from benchmark_utils import DataAggregator
+from benchmark_utils import generate_json_metrics
 from benchmark_utils import print_dict
 from benchmark_utils import timed_section
 
@@ -140,19 +142,12 @@ def _export_runtime_metrics_to_json(self, metric_dict):
             if file_path is None:
                 return
 
-            metric_dict = {
-                # Creating a copy to avoid modifying the original
-                "results": copy.deepcopy(metric_dict),
-                "runtime_arguments": vars(self._args)
-            }
+            json_string = generate_json_metrics(
+                metrics=metric_dict,
+                args=vars(self._args),
+            )
 
             with open(file_path, 'w') as json_f:
-                json_string = json.dumps(
-                    metric_dict,
-                    default=lambda o: o.__dict__,
-                    sort_keys=True,
-                    indent=4
-                )
                 print(json_string, file=json_f)
 
         except Exception as e:
@@ -205,6 +200,36 @@ def _export_runtime_metrics_to_csv(self, metric_dict):
         except Exception as e:
             logging.error(f"An exception occured during export to CSV: {e}")
 
+    def _upload_metrics_to_endpoint(self, metric_dict):
+
+        try:
+
+            if self._args.upload_metrics_endpoint is None:
+                return
+
+            json_string = generate_json_metrics(
+                metrics=metric_dict,
+                args=vars(self._args),
+            )
+
+            headers = {"Content-Type": "application/json"}
+
+            response = requests.put(
+                self._args.upload_metrics_endpoint,
+                data=json.dumps(data),
+                headers=headers
+            )
+            response.raise_for_status()
+
+            logging.info(
+                "Metrics Uploaded to endpoint: "
+                f"`{self._args.upload_metrics_endpoint}` with experiment name: "
+                f"`{self._args.experiment_name}`."
+            )
+
+        except Exception as e:
+            logging.error(f"An exception occured during export to JSON: {e}")
+
     def _get_graph_func(self):
         """Retreives a frozen SavedModel and applies TF-TRT
         use_tftrt: bool, if true use TensorRT
@@ -587,9 +612,12 @@ def start_profiling():
                 if not self._args.use_synthetic_data:
                     data_aggregator.aggregate_data(y_pred, y)
 
-            if (not self._args.debug_performance and
-                    step_idx % self._args.display_every !=
-                    0):  # avoids double printing
+            # yapf: disable
+            if (
+                not self._args.debug_performance and
+                # avoids double printing
+                step_idx % self._args.display_every != 0
+            ):
                 log_step(
                     step_idx,
                     display_every=1,  # force print
@@ -602,6 +630,7 @@ def start_profiling():
                         dequeue_times[-self._args.display_every:]
                     ) * 1000
                 )
+            # yapf: enable
 
             if step_idx >= 100:
                 stop_profiling()
@@ -668,6 +697,7 @@ def timing_metrics(time_arr, log_prefix):
 
             self._export_runtime_metrics_to_json(metrics)
             self._export_runtime_metrics_to_csv(metrics)
+            self._upload_metrics_to_endpoint(metrics)
 
             def log_value(key, val):
                 if isinstance(val, (int, str)):

diff --git a/tftrt/benchmarking-python/benchmark_utils.py b/tftrt/benchmarking-python/benchmark_utils.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # -*- coding: utf-8 -*-
 
+import json
 import time
 
 import numpy as np
@@ -114,6 +115,15 @@ def _format(tensor):
     return predictions, expected
 
 
+def generate_json_metrics(metrics, args):
+    metric_dict = {"results": metrics, "runtime_arguments": args}
+
+    json_string = json.dumps(
+        metric_dict, default=lambda o: o.__dict__, sort_keys=True, indent=4
+    )
+    return json_string
+
+
 class DataAggregator(object):
 
     def __init__(self, postprocess_model_outputs_fn, args):