etsap-TIMES · olejandro · Nov 24, 2023 · Nov 24, 2023
diff --git a/README.md b/README.md
@@ -43,6 +43,24 @@ Additionally, you can install a git pre-commit that will ensure that your change
 ```bash
 pre-commit install
 ```
+If you want to skip these pre-commit steps for a particular commit, if for instance pyright has issues but you still want to commit your changes to your branch, you can run:
+```bash
+git commit --no-verify
+```
+
+## Debugging Regressions
+
+If your change is causing regressions on one of the benchmarks, a useful way to debug and find the difference is to run the tool in verbose mode and compare the intermediate tables. For example, if your branch has regressions on Demo 1:
+```bash
+# First, on the `main` branch:
+times-excel-reader benchmarks/xlsx/DemoS_001 --output_dir benchmarks/out/DemoS_001-all --ground_truth_dir benchmarks/csv/DemoS_001-all --verbose > before 2>&1
+# Then, on your branch:
+git checkout my-branch-name
+times-excel-reader benchmarks/xlsx/DemoS_001 --output_dir benchmarks/out/DemoS_001-all --ground_truth_dir benchmarks/csv/DemoS_001-all --verbose > after 2>&1
+# And then compare the files `before` and `after`
+code -d before after
+```
+VS Code will highlight the changes in the two files, which should correspond to any differences in the intermediate tables.
 
 ## Running Benchmarks
 

diff --git a/times_reader/__main__.py b/times_reader/__main__.py
@@ -1,5 +1,4 @@
 import argparse
-from collections import defaultdict
 from concurrent.futures import ProcessPoolExecutor
 from pandas.core.frame import DataFrame
 import pandas as pd
@@ -19,6 +18,7 @@ def convert_xl_to_times(
     output_dir: str,
     config: datatypes.Config,
     use_pkl: bool,
+    verbose: bool = False,
     stop_after_read: bool = False,
 ) -> Dict[str, DataFrame]:
     pickle_file = "raw_tables.pkl"
@@ -107,14 +107,26 @@ def convert_xl_to_times(
         start_time = time.time()
         output = transform(config, input)
         end_time = time.time()
+        sep = "\n\n" + "=" * 80 + "\n" if verbose else ""
         print(
-            f"transform {transform.__code__.co_name} took {end_time-start_time:.2f} seconds"
+            f"{sep}transform {transform.__code__.co_name} took {end_time-start_time:.2f} seconds"
         )
+        if verbose:
+            if isinstance(output, list):
+                for table in sorted(
+                    output, key=lambda t: (t.tag, t.filename, t.sheetname, t.range)
+                ):
+                    print(table)
+            elif isinstance(output, dict):
+                for tag, df in output.items():
+                    df_str = df.to_csv(index=False, lineterminator="\n")
+                    print(f"{tag}\n{df_str}{df.shape}\n")
         input = output
+    assert isinstance(output, dict)
 
     print(
         f"Conversion complete, {len(output)} tables produced,"
-        f" {sum(df.shape[0] for tablename, df in output.items())} rows"
+        f" {sum(df.shape[0] for df in output.values())} rows"
     )
 
     return output
@@ -384,6 +396,12 @@ def main():
         help="Read xlsx files and stop after outputting raw_tables.txt",
     )
     args_parser.add_argument("--use_pkl", action="store_true")
+    args_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Verbose mode: print tables after every transform",
+    )
     args = args_parser.parse_args()
 
     config = datatypes.Config("times_mapping.txt", "times-info.json", "veda-tags.json")
@@ -404,11 +422,18 @@ def main():
 
     if args.only_read:
         tables = convert_xl_to_times(
-            input_files, args.output_dir, config, args.use_pkl, stop_after_read=True
+            input_files,
+            args.output_dir,
+            config,
+            args.use_pkl,
+            verbose=args.verbose,
+            stop_after_read=True,
         )
         sys.exit(0)
 
-    tables = convert_xl_to_times(input_files, args.output_dir, config, args.use_pkl)
+    tables = convert_xl_to_times(
+        input_files, args.output_dir, config, args.use_pkl, verbose=args.verbose
+    )
 
     if args.dd:
         write_dd_files(tables, config, args.output_dir)

diff --git a/times_reader/datatypes.py b/times_reader/datatypes.py
@@ -4,7 +4,7 @@
 from itertools import chain
 import json
 import re
-from typing import Any, Dict, Iterable, List, Set, Tuple
+from typing import Dict, Iterable, List, Set, Tuple
 from enum import Enum
 from pandas.core.frame import DataFrame
 
@@ -107,6 +107,10 @@ def __eq__(self, o: object) -> bool:
             )
         )
 
+    def __str__(self) -> str:
+        df_str = self.dataframe.to_csv(index=False, lineterminator="\n")
+        return f"EmbeddedXlTable(tag={self.tag}, uc_sets={self.uc_sets}, sheetname={self.sheetname}, range={self.range}, filename={self.filename}, dataframe=\n{df_str}{self.dataframe.shape})"
+
 
 @dataclass
 class TimesXlMap: