diff --git a/.gitignore b/.gitignore
index 848dbcf..ceafcba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,4 @@ docs/api/
 .coverage
 /out.txt
 *.log
+/profile.*
diff --git a/pyproject.toml b/pyproject.toml
index 98acddb..2995c0c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,13 +51,13 @@ xl2times = "xl2times.__main__:main"
 # don't print runtime warnings
 filterwarnings = ["ignore::DeprecationWarning", "ignore::UserWarning", "ignore::FutureWarning"]
 # show output, print test coverage report
-addopts = '-s --durations=0 --durations-min=5.0 --tb=native --cov-report term --cov-report html --cov=xl2times --cov=utils'
+addopts = '-s --durations=0 --durations-min=5.0 --tb=native'
 
 [tool.poe.tasks]
 # Automation of common dev tasks etc.
 # Run with: `poe <target>`, e,g. `poe lint` or `poe benchmark Ireland`.
 # See https://github.com/nat-n/poethepoet for details.
-benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --verbose --run", help = "Run a single benchmark.  Usage: poe benchmark <benchmark_name>" }
+benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --run", help = "Run a single benchmark.  Usage: poe benchmark <benchmark_name>" }
 benchmark_all = { shell = "python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt", help = "Run the project", interpreter = "posix" }
-lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix"}
-test = { cmd = "pytest", help = "Run unit tests with pytest" }
+lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix" }
+test = { cmd = "pytest --cov-report term --cov-report html --cov=xl2times --cov=utils", help = "Run unit tests with pytest" }
diff --git a/tests/data/process_uc_wildcards_ireland_data.parquet b/tests/data/process_uc_wildcards_ireland_data.parquet
new file mode 100644
index 0000000..5efddc3
Binary files /dev/null and b/tests/data/process_uc_wildcards_ireland_data.parquet differ
diff --git a/tests/data/process_uc_wildcards_ireland_dict.pkl b/tests/data/process_uc_wildcards_ireland_dict.pkl
new file mode 100644
index 0000000..96a37fd
Binary files /dev/null and b/tests/data/process_uc_wildcards_ireland_dict.pkl differ
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index af77b6b..694c31f 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -2,12 +2,20 @@
 
 import pandas as pd
 
-from xl2times import transforms
+from xl2times import transforms, utils, datatypes
 from xl2times.transforms import (
     _process_comm_groups_vectorised,
     _count_comm_group_vectorised,
+    expand_rows,
+    get_matching_commodities,
+    get_matching_processes,
+    _match_uc_wildcards,
+    process_map,
+    commodity_map,
 )
 
+logger = utils.get_logger()
+
 pd.set_option(
     "display.max_rows",
     20,
@@ -22,7 +30,99 @@
 )
 
 
+def _match_uc_wildcards_old(
+    df: pd.DataFrame, dictionary: dict[str, pd.DataFrame]
+) -> pd.DataFrame:
+    """Old version of the process_uc_wildcards matching logic, for comparison with the new vectorised version.
+    TODO remove this function once validated.
+    """
+
+    def make_str(df):
+        if df is not None and len(df) != 0:
+            list_from_df = df.iloc[:, 0].unique()
+            return ",".join(list_from_df)
+        else:
+            return None
+
+    df["process"] = df.apply(
+        lambda row: make_str(get_matching_processes(row, dictionary)), axis=1
+    )
+    df["commodity"] = df.apply(
+        lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1
+    )
+
+    query_columns = transforms.process_map.keys() | transforms.commodity_map.keys()
+    cols_to_drop = [col for col in df.columns if col in query_columns]
+
+    df = expand_rows(
+        query_columns,
+        datatypes.EmbeddedXlTable(
+            tag="",
+            uc_sets={},
+            sheetname="",
+            range="",
+            filename="",
+            dataframe=df.drop(columns=cols_to_drop),
+        ),
+    ).dataframe
+    return df
+
+
 class TestTransforms:
+    def test_uc_wildcards(self):
+        """
+        Tests logic that matches wildcards in the process_uc_wildcards transform .
+
+        Results on Ireland model:
+            Old method took 0:00:08.42 seconds
+            New method took 0:00:00.18 seconds, speedup: 46.5x
+        """
+        import pickle
+
+        df_in = pd.read_parquet("tests/data/process_uc_wildcards_ireland_data.parquet")
+        with open("tests/data/process_uc_wildcards_ireland_dict.pkl", "rb") as f:
+            dictionary = pickle.load(f)
+        df = df_in.copy()
+
+        t0 = datetime.now()
+
+        # optimised functions
+        df_new = _match_uc_wildcards(
+            df, process_map, dictionary, get_matching_processes, "process"
+        )
+        df_new = _match_uc_wildcards(
+            df_new, commodity_map, dictionary, get_matching_commodities, "commodity"
+        )
+
+        t1 = datetime.now()
+
+        # Unoptimised function
+        df_old = _match_uc_wildcards_old(df, dictionary)
+
+        t2 = datetime.now()
+
+        logger.info(f"Old method took {t2 - t1} seconds")
+        logger.info(
+            f"New method took {t1 - t0} seconds, speedup: {((t2 - t1) / (t1 - t0)):.1f}x"
+        )
+
+        # unit tests
+        assert df_new is not None and not df_new.empty
+        assert (
+            df_new.shape[0] >= df_in.shape[0]
+        ), "should have more rows after processing uc_wildcards"
+        assert (
+            df_new.shape[1] < df_in.shape[1]
+        ), "should have fewer columns after processing uc_wildcards"
+        assert "process" in df_new.columns, "should have added process column"
+        assert "commodity" in df_new.columns, "should have added commodity column"
+
+        # consistency checks with old method
+        assert len(set(df_new.columns).symmetric_difference(set(df_old.columns))) == 0
+        assert df_new.fillna(-1).equals(
+            df_old.fillna(-1)
+        ), "Dataframes should be equal (ignoring Nones and NaNs)"
+
     def test_generate_commodity_groups(self):
         """
         Tests that the _count_comm_group_vectorised function works as expected.
@@ -64,4 +164,5 @@ def test_default_pcg_vectorised(self):
 
 
 if __name__ == "__main__":
-    TestTransforms().test_default_pcg_vectorised()
+    # TestTransforms().test_default_pcg_vectorised()
+    TestTransforms().test_uc_wildcards()
diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py
index 8d9205f..33cc790 100644
--- a/utils/run_benchmarks.py
+++ b/utils/run_benchmarks.py
@@ -14,10 +14,11 @@
 import git
 import pandas as pd
 import yaml
-from loguru import logger
 from tabulate import tabulate
 
+from dd_to_csv import main
 from xl2times import utils
+from xl2times.__main__ import parse_args, run
 from xl2times.utils import max_workers
 
 logger = utils.get_logger()
@@ -146,7 +147,8 @@ def run_benchmark(
     # First convert ground truth DD to csv
     if not skip_csv:
         shutil.rmtree(csv_folder, ignore_errors=True)
-        if os.name != "nt":
+        if not debug:
+            # run as subprocess if not in --debug mode
             res = subprocess.run(
                 [
                     "python",
@@ -157,6 +159,7 @@ def run_benchmark(
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
                 text=True,
+                shell=True if os.name == "nt" else False,
             )
             if res.returncode != 0:
                 # Remove partial outputs
@@ -166,9 +169,12 @@ def run_benchmark(
                 sys.exit(5)
         else:
             # If debug option is set, run as a function call to allow stepping with a debugger.
-            from dd_to_csv import main
-
-            main([dd_folder, csv_folder])
+            try:
+                main([dd_folder, csv_folder])
+            except Exception:
+                logger.exception(f"dd_to_csv failed on {benchmark['name']}")
+                shutil.rmtree(csv_folder, ignore_errors=True)
+                sys.exit(5)
 
     elif not path.exists(csv_folder):
         logger.error(f"--skip_csv is true but {csv_folder} does not exist")
@@ -189,22 +195,12 @@ def run_benchmark(
     else:
         args.append(xl_folder)
     start = time.time()
-    res = None
-    if not debug:
-        res = subprocess.run(
-            ["xl2times"] + args,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-        )
-    else:
-        # If debug option is set, run as a function call to allow stepping with a debugger.
-        from xl2times.__main__ import run, parse_args
 
-        summary = run(parse_args(args))
+    # Call the conversion function directly
+    summary = run(parse_args(args))
 
-        # pack the results into a namedtuple pretending to be a return value from a subprocess call (as above).
-        res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0)
+    # pack the results into a namedtuple pretending to be a return value from a subprocess call (as above).
+    res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0)
 
     runtime = time.time() - start
 
@@ -255,8 +251,13 @@ def run_all_benchmarks(
         debug=debug,
     )
 
-    with ProcessPoolExecutor(max_workers=max_workers) as executor:
-        results = list(executor.map(run_a_benchmark, benchmarks))
+    if debug:
+        # bypass process pool and call benchmarks directly if --debug is set.
+        results = [run_a_benchmark(b) for b in benchmarks]
+    else:
+        with ProcessPoolExecutor(max_workers=max_workers) as executor:
+            results = list(executor.map(run_a_benchmark, benchmarks))
+
     logger.info("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n")
 
     if skip_regression:
@@ -302,9 +303,10 @@ def run_all_benchmarks(
             )
             sys.exit(8)
 
-        # Re-run benchmarks on main
+        # Re-run benchmarks on main - check it out and pull
         repo.heads.main.checkout()
-        logger.info("Running benchmarks on main", end="", flush=True)
+        origin.pull("main")  # if main already exists, make sure it's up to date
+        logger.info("Running benchmarks on main")
         run_a_benchmark = partial(
             run_benchmark,
             benchmarks_folder=benchmarks_folder,
@@ -441,7 +443,8 @@ def run_all_benchmarks(
         "--debug",
         action="store_true",
         default=False,
-        help="Run each benchmark as a function call to allow a debugger to stop at breakpoints in benchmark runs.",
+        help="Run each benchmark as a direct function call (disables subprocesses) to allow a debugger to stop at breakpoints "
+        "in benchmark runs.",
     )
     args = args_parser.parse_args()
 
@@ -449,11 +452,11 @@ def run_all_benchmarks(
     benchmarks_folder = spec["benchmarks_folder"]
     benchmark_names = [b["name"] for b in spec["benchmarks"]]
     if len(set(benchmark_names)) != len(benchmark_names):
-        logger.error(f"Found duplicate name in benchmarks YAML file")
+        logger.error("Found duplicate name in benchmarks YAML file")
         sys.exit(11)
 
     if args.dd and args.times_dir is None:
-        logger.error(f"--times_dir is required when using --dd")
+        logger.error("--times_dir is required when using --dd")
         sys.exit(12)
 
     if args.run is not None:
diff --git a/xl2times/__main__.py b/xl2times/__main__.py
index 0653773..e502768 100644
--- a/xl2times/__main__.py
+++ b/xl2times/__main__.py
@@ -1,5 +1,7 @@
 import argparse
 from concurrent.futures import ProcessPoolExecutor
+from datetime import datetime
+
 from pandas.core.frame import DataFrame
 import pandas as pd
 import pickle
@@ -27,9 +29,10 @@ def convert_xl_to_times(
     stop_after_read: bool = False,
 ) -> Dict[str, DataFrame]:
     pickle_file = "raw_tables.pkl"
+    t0 = datetime.now()
     if use_pkl and os.path.isfile(pickle_file):
         raw_tables = pickle.load(open(pickle_file, "rb"))
-        logger.warning(f"Using pickled data not xlsx")
+        logger.warning("Using pickled data not xlsx")
     else:
         raw_tables = []
 
@@ -40,12 +43,12 @@ def convert_xl_to_times(
                     raw_tables.extend(result)
         else:
             for f in input_files:
-                result = excel.extract_tables(f)
+                result = excel.extract_tables(str(Path(f).absolute()))
                 raw_tables.extend(result)
         pickle.dump(raw_tables, open(pickle_file, "wb"))
     logger.info(
         f"Extracted {len(raw_tables)} tables,"
-        f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows"
+        f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows in {datetime.now() - t0}"
     )
 
     if stop_after_read:
@@ -248,7 +251,7 @@ def produce_times_tables(
     result = {}
     used_tables = set()
     for mapping in config.times_xl_maps:
-        if not mapping.xl_name in input:
+        if mapping.xl_name not in input:
             logger.warning(
                 f"Cannot produce table {mapping.times_name} because"
                 f" {mapping.xl_name} does not exist"
@@ -281,7 +284,7 @@ def produce_times_tables(
                 # Excel columns can be duplicated into multiple Times columns
                 for times_col, xl_col in mapping.col_map.items():
                     df[times_col] = df[xl_col]
-                cols_to_drop = [x for x in df.columns if not x in mapping.times_cols]
+                cols_to_drop = [x for x in df.columns if x not in mapping.times_cols]
                 df.drop(columns=cols_to_drop, inplace=True)
                 df.drop_duplicates(inplace=True)
                 df.reset_index(drop=True, inplace=True)
@@ -392,7 +395,7 @@ def dump_tables(tables: List, filename: str) -> List:
     return tables
 
 
-def run(args) -> str | None:
+def run(args: argparse.Namespace) -> str | None:
     """
     Runs the xl2times conversion.
     Args:
diff --git a/xl2times/excel.py b/xl2times/excel.py
index b9c4597..befde29 100644
--- a/xl2times/excel.py
+++ b/xl2times/excel.py
@@ -4,7 +4,6 @@
 from typing import Dict, List
 import time
 from pandas.core.frame import DataFrame
-import pandas as pd
 import numpy
 import re
 from . import datatypes
@@ -43,8 +42,8 @@ def extract_tables(filename: str) -> List[datatypes.EmbeddedXlTable]:
                         if len(parts) == 2:
                             uc_sets[parts[0].strip()] = parts[1].strip()
                         else:
-                            logger.info(
-                                f"WARNING: Malformed UC_SET in {sheet.title}, {filename}"
+                            logger.warning(
+                                f"Malformed UC_SET in {sheet.title}, {filename}"
                             )
                     else:
                         col_index = df.columns.get_loc(colname)
diff --git a/xl2times/transforms.py b/xl2times/transforms.py
index 67ed257..2bac644 100644
--- a/xl2times/transforms.py
+++ b/xl2times/transforms.py
@@ -6,12 +6,14 @@
 from functools import reduce
 from itertools import groupby
 from pathlib import Path
+from typing import Callable
 from typing import Dict, List, Set
 
 import pandas as pd
 from loguru import logger
 from more_itertools import locate, one
 from pandas.core.frame import DataFrame
+from tqdm import tqdm
 
 from . import datatypes
 from . import utils
@@ -31,6 +33,20 @@
     "PRC_VINT": "vintage",
 }
 
+process_map = {
+    "pset_pn": "processes_by_name",
+    "pset_pd": "processes_by_desc",
+    "pset_set": "processes_by_sets",
+    "pset_ci": "processes_by_comm_in",
+    "pset_co": "processes_by_comm_out",
+}
+
+commodity_map = {
+    "cset_cn": "commodities_by_name",
+    "cset_cd": "commodities_by_desc",
+    "cset_set": "commodities_by_sets",
+}
+
 
 def remove_comment_rows(
     config: datatypes.Config,
@@ -292,27 +308,32 @@ def merge_tables(
 
     for key, value in groupby(sorted(tables, key=lambda t: t.tag), lambda t: t.tag):
         group = list(value)
-        if not all(
-            set(t.dataframe.columns) == set(group[0].dataframe.columns) for t in group
-        ):
-            cols = [(",".join(g.dataframe.columns), g) for g in group]
-            logger.warning(
-                f"Cannot merge tables with tag {key} as their columns are not identical"
-            )
-            for c, table in cols:
-                logger.info(
-                    f"  {c} from {table.range}, {table.sheetname}, {table.filename}"
-                )
-        else:
-            df = pd.concat([table.dataframe for table in group], ignore_index=True)
-
-            match key:
-                case datatypes.Tag.fi_comm:
-                    model.commodities = df
-                case datatypes.Tag.fi_process:
-                    model.processes = df
-                case _:
-                    result[key] = df
+
+        if len(group) == 0:
+            continue
+
+        df = pd.concat([table.dataframe for table in group], ignore_index=True)
+        result[key] = df
+
+        # VEDA appears to support merging tables where come columns are optional, e.g. ctslvl and ctype from ~FI_COMM.
+        # So just print detailed warning if we find tables with fewer columns than the concat'ed table.
+        concat_cols = set(df.columns)
+        missing_cols = [concat_cols - set(t.dataframe.columns) for t in group]
+
+        if any([len(m) for m in missing_cols]):
+            err = f"WARNING: Possible merge error for table: '{key}'! Merged table has more columns than individual table(s), see details below:"
+            for table in group:
+                err += f"\n\tColumns: {list(table.dataframe.columns)} from {table.range}, {table.sheetname}, {table.filename}"
+            logger.warning(err)
+
+        match key:
+            case datatypes.Tag.fi_comm:
+                model.commodities = df
+            case datatypes.Tag.fi_process:
+                model.processes = df
+            case _:
+                result[key] = df
+
     return result
 
 
@@ -1991,7 +2012,7 @@ def process_transform_availability(
     return result
 
 
-def filter_by_pattern(df, pattern):
+def filter_by_pattern(df: pd.DataFrame, pattern: str) -> pd.DataFrame:
     # Duplicates can be created when a process has multiple commodities that match the pattern
     df = df.filter(regex=utils.create_regexp(pattern), axis="index").drop_duplicates()
     exclude = df.filter(regex=utils.create_negative_regexp(pattern), axis="index").index
@@ -2004,35 +2025,28 @@ def intersect(acc, df):
     return acc.merge(df)
 
 
-def get_matching_processes(row, dictionary):
+def get_matching_processes(row: pd.Series, topology: Dict[str, DataFrame]) -> pd.Series:
     matching_processes = None
-    for col, key in [
-        ("pset_pn", "processes_by_name"),
-        ("pset_pd", "processes_by_desc"),
-        ("pset_set", "processes_by_sets"),
-        ("pset_ci", "processes_by_comm_in"),
-        ("pset_co", "processes_by_comm_out"),
-    ]:
+    for col, key in process_map.items():
         if col in row.index and row[col] is not None:
-            matching_processes = intersect(
-                matching_processes, filter_by_pattern(dictionary[key], row[col].upper())
-            )
+            proc_set = topology[key]
+            pattern = row[col].upper()
+            filtered = filter_by_pattern(proc_set, pattern)
+            matching_processes = intersect(matching_processes, filtered)
+
     if matching_processes is not None and any(matching_processes.duplicated()):
         raise ValueError("duplicated")
+
     return matching_processes
 
 
-def get_matching_commodities(row, dictionary):
+def get_matching_commodities(row: pd.Series, topology: Dict[str, DataFrame]):
     matching_commodities = None
-    for col, key in [
-        ("cset_cn", "commodities_by_name"),
-        ("cset_cd", "commodities_by_desc"),
-        ("cset_set", "commodities_by_sets"),
-    ]:
+    for col, key in commodity_map.items():
         if col in row.index and row[col] is not None:
             matching_commodities = intersect(
                 matching_commodities,
-                filter_by_pattern(dictionary[key], row[col].upper()),
+                filter_by_pattern(topology[key], row[col].upper()),
             )
     return matching_commodities
 
@@ -2103,40 +2117,18 @@ def process_uc_wildcards(
 ) -> Dict[str, DataFrame]:
     tag = datatypes.Tag.uc_t
 
-    def make_str(df):
-        if df is not None and len(df) != 0:
-            list_from_df = df.iloc[:, 0].unique()
-            return ",".join(list_from_df)
-        else:
-            return None
-
-    if tag in tables:
+    if tag in tqdm(tables, desc="Processing uc_wildcards on tables"):
         start_time = time.time()
         df = tables[tag]
-        query_columns = config.query_columns[tag]
         dictionary = generate_topology_dictionary(tables, model)
 
-        df["process"] = df.apply(
-            lambda row: make_str(get_matching_processes(row, dictionary)), axis=1
+        df = _match_uc_wildcards(
+            df, process_map, dictionary, get_matching_processes, "process"
         )
-        df["commodity"] = df.apply(
-            lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1
+        df = _match_uc_wildcards(
+            df, commodity_map, dictionary, get_matching_commodities, "commodity"
         )
 
-        cols_to_drop = [col for col in df.columns if col in query_columns]
-
-        df = expand_rows(
-            query_columns,
-            datatypes.EmbeddedXlTable(
-                tag="",
-                uc_sets={},
-                sheetname="",
-                range="",
-                filename="",
-                dataframe=df.drop(columns=cols_to_drop),
-            ),
-        ).dataframe
-
         tables[tag] = df
 
         logger.info(
@@ -2146,6 +2138,66 @@ def make_str(df):
     return tables
 
 
+def _match_uc_wildcards(
+    df: pd.DataFrame,
+    process_map: dict[str, str],
+    dictionary: dict[str, pd.DataFrame],
+    matcher: Callable,
+    result_col: str,
+) -> pd.DataFrame:
+    """
+    Match wildcards in the given table using the given process map and dictionary.
+
+    Args:
+        df: Table to match wildcards in.
+        process_map: Mapping of column names to process sets.
+        dictionary: Dictionary of process sets to match against.
+        matcher: Matching function to use, e.g. get_matching_processes or get_matching_commodities.
+        result_col: Name of the column to store the matched results in.
+
+    Returns:
+        The table with the wildcard columns removed and the results of the wildcard matches added as a column named `results_col`
+    """
+    proc_cols = list(process_map.keys())
+
+    # drop duplicate sets of wildcard columns to save repeated (slow) regex matching.  This makes things much faster.
+    unique_filters = df[proc_cols].drop_duplicates().dropna(axis="rows", how="all")
+
+    # match all the wildcards columns against the dictionary names
+    matches = unique_filters.apply(lambda row: matcher(row, dictionary), axis=1)
+
+    # we occasionally get a Dataframe back from  the matchers.  convert these to Series.
+    matches = (
+        matches.iloc[:, 0].to_list()
+        if isinstance(matches, pd.DataFrame)
+        else matches.to_list()
+    )
+    matches = [
+        df.iloc[:, 0].to_list() if df is not None and len(df) != 0 else None
+        for df in matches
+    ]
+    matches = pd.DataFrame({result_col: matches})
+
+    # then join with the wildcard cols to their list of matched names so we can join them back into the table df.
+    filter_matches = unique_filters.reset_index(drop=True).merge(
+        matches, left_index=True, right_index=True
+    )
+
+    # Finally we merge the matches back into the original table. This join re-duplicates the duplicate filters dropped above for speed.
+    # And we explode any matches to multiple names to give a long-format table.
+    df = (
+        df.merge(filter_matches, left_on=proc_cols, right_on=proc_cols, how="left")
+        .explode(result_col)
+        .reset_index(drop=True)
+        .drop(columns=proc_cols)
+    )
+
+    # replace NaNs in results_col with None for consistency with older logic
+    df[result_col] = df[result_col].where(df[result_col].notna(), None)
+
+    return df
+
+
 def process_wildcards(
     config: datatypes.Config,
     tables: Dict[str, DataFrame],
@@ -2170,7 +2222,7 @@ def match_wildcards(
             matching_commodities is None or len(matching_commodities) == 0
         ):  # TODO is this necessary? Try without?
             # TODO debug these
-            logger.warning(f"a row matched no processes or commodities")
+            logger.warning("a row matched no processes or commodities")
             return None
         return matching_processes, matching_commodities
 
@@ -2214,7 +2266,11 @@ def eval_and_update(
         # TFM_UPD: expand wildcards in each row, query FI_T to find matching rows,
         # evaluate the update formula, and add new rows to FI_T
         # TODO perf: collect all updates and go through FI_T only once?
-        for _, row in updates.iterrows():
+        for _, row in tqdm(
+            updates.iterrows(),
+            total=len(updates),
+            desc=f"Processing wildcard for {datatypes.Tag.tfm_upd}",
+        ):
             if row["value"] is None:  # TODO is this really needed?
                 continue
             match = match_wildcards(row)
@@ -2237,7 +2293,11 @@ def eval_and_update(
         new_tables = []
 
         # TFM_INS: expand each row by wildcards, then add to FI_T
-        for _, row in updates.iterrows():
+        for _, row in tqdm(
+            updates.iterrows(),
+            total=len(updates),
+            desc=f"Processing wildcard for {datatypes.Tag.tfm_ins}",
+        ):
             match = match_wildcards(row)
             # TODO perf: add matched procs/comms into column and use explode?
             new_rows = pd.DataFrame([row.filter(table.columns)])
@@ -2257,10 +2317,14 @@ def eval_and_update(
 
         # TFM_INS-TXT: expand row by wildcards, query FI_PROC/COMM for matching rows,
         # evaluate the update formula, and inplace update the rows
-        for _, row in updates.iterrows():
+        for _, row in tqdm(
+            updates.iterrows(),
+            total=len(updates),
+            desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt}",
+        ):
             match = match_wildcards(row)
             if match is None:
-                logger.warning(f"TFM_INS-TXT row matched neither commodity nor process")
+                logger.warning("TFM_INS-TXT row matched neither commodity nor process")
                 continue
             processes, commodities = match
             if commodities is not None:
@@ -2282,7 +2346,11 @@ def eval_and_update(
         table = tables[datatypes.Tag.fi_t]
         new_tables = []
 
-        for _, row in updates.iterrows():
+        for _, row in tqdm(
+            updates.iterrows(),
+            total=len(updates),
+            desc=f"Processing wildcard for {datatypes.Tag.tfm_mig}",
+        ):
             match = match_wildcards(row)
             processes, commodities = match if match is not None else (None, None)
             # TODO should we also query on limtype?
diff --git a/xl2times/utils.py b/xl2times/utils.py
index 0628e38..fcc10eb 100644
--- a/xl2times/utils.py
+++ b/xl2times/utils.py
@@ -2,6 +2,7 @@
     annotations,
 )  # see https://loguru.readthedocs.io/en/stable/api/type_hints.html#module-autodoc_stub_file.loguru
 
+import functools
 import os
 import re
 import sys
@@ -192,6 +193,7 @@ def remove_positive_patterns(pattern):
     return ",".join([word[1:] for word in pattern.split(",") if word[0] == "-"])
 
 
+@functools.lru_cache(maxsize=int(1e6))
 def create_regexp(pattern):
     # exclude negative patterns
     if has_negative_patterns(pattern):
@@ -205,6 +207,7 @@ def create_regexp(pattern):
     return re.compile(pattern)
 
 
+@functools.lru_cache(maxsize=int(1e6))
 def create_negative_regexp(pattern):
     pattern = remove_positive_patterns(pattern)
     if len(pattern) == 0:
@@ -262,7 +265,7 @@ def get_logger(log_name: str = default_log_name, log_dir: str = ".") -> loguru.L
                 "level": "DEBUG",
                 "colorize": False,
                 "serialize": False,
-                "diagnose": False,
+                "diagnose": True,
                 "rotation": "20 MB",
                 "compression": "zip",
             },