diff --git a/.gitignore b/.gitignore index 848dbcf..ceafcba 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ docs/api/ .coverage /out.txt *.log +/profile.* diff --git a/pyproject.toml b/pyproject.toml index 98acddb..2995c0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,13 +51,13 @@ xl2times = "xl2times.__main__:main" # don't print runtime warnings filterwarnings = ["ignore::DeprecationWarning", "ignore::UserWarning", "ignore::FutureWarning"] # show output, print test coverage report -addopts = '-s --durations=0 --durations-min=5.0 --tb=native --cov-report term --cov-report html --cov=xl2times --cov=utils' +addopts = '-s --durations=0 --durations-min=5.0 --tb=native' [tool.poe.tasks] # Automation of common dev tasks etc. # Run with: `poe `, e,g. `poe lint` or `poe benchmark Ireland`. # See https://github.com/nat-n/poethepoet for details. -benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --verbose --run", help = "Run a single benchmark. Usage: poe benchmark " } +benchmark = { cmd = "python utils/run_benchmarks.py benchmarks.yml --run", help = "Run a single benchmark. Usage: poe benchmark " } benchmark_all = { shell = "python utils/run_benchmarks.py benchmarks.yml --verbose | tee out.txt", help = "Run the project", interpreter = "posix" } -lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix"} -test = { cmd = "pytest", help = "Run unit tests with pytest" } +lint = { shell = "git add .pre-commit-config.yaml & pre-commit run", help = "Run pre-commit hooks", interpreter = "posix" } +test = { cmd = "pytest --cov-report term --cov-report html --cov=xl2times --cov=utils", help = "Run unit tests with pytest" } diff --git a/tests/data/process_uc_wildcards_ireland_data.parquet b/tests/data/process_uc_wildcards_ireland_data.parquet new file mode 100644 index 0000000..5efddc3 Binary files /dev/null and b/tests/data/process_uc_wildcards_ireland_data.parquet differ diff --git a/tests/data/process_uc_wildcards_ireland_dict.pkl b/tests/data/process_uc_wildcards_ireland_dict.pkl new file mode 100644 index 0000000..96a37fd Binary files /dev/null and b/tests/data/process_uc_wildcards_ireland_dict.pkl differ diff --git a/tests/test_transforms.py b/tests/test_transforms.py index af77b6b..694c31f 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -2,12 +2,20 @@ import pandas as pd -from xl2times import transforms +from xl2times import transforms, utils, datatypes from xl2times.transforms import ( _process_comm_groups_vectorised, _count_comm_group_vectorised, + expand_rows, + get_matching_commodities, + get_matching_processes, + _match_uc_wildcards, + process_map, + commodity_map, ) +logger = utils.get_logger() + pd.set_option( "display.max_rows", 20, @@ -22,7 +30,99 @@ ) +def _match_uc_wildcards_old( + df: pd.DataFrame, dictionary: dict[str, pd.DataFrame] +) -> pd.DataFrame: + """Old version of the process_uc_wildcards matching logic, for comparison with the new vectorised version. + TODO remove this function once validated. + """ + + def make_str(df): + if df is not None and len(df) != 0: + list_from_df = df.iloc[:, 0].unique() + return ",".join(list_from_df) + else: + return None + + df["process"] = df.apply( + lambda row: make_str(get_matching_processes(row, dictionary)), axis=1 + ) + df["commodity"] = df.apply( + lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1 + ) + + query_columns = transforms.process_map.keys() | transforms.commodity_map.keys() + cols_to_drop = [col for col in df.columns if col in query_columns] + + df = expand_rows( + query_columns, + datatypes.EmbeddedXlTable( + tag="", + uc_sets={}, + sheetname="", + range="", + filename="", + dataframe=df.drop(columns=cols_to_drop), + ), + ).dataframe + return df + + class TestTransforms: + def test_uc_wildcards(self): + """ + Tests logic that matches wildcards in the process_uc_wildcards transform . + + Results on Ireland model: + Old method took 0:00:08.42 seconds + New method took 0:00:00.18 seconds, speedup: 46.5x + """ + import pickle + + df_in = pd.read_parquet("tests/data/process_uc_wildcards_ireland_data.parquet") + with open("tests/data/process_uc_wildcards_ireland_dict.pkl", "rb") as f: + dictionary = pickle.load(f) + df = df_in.copy() + + t0 = datetime.now() + + # optimised functions + df_new = _match_uc_wildcards( + df, process_map, dictionary, get_matching_processes, "process" + ) + df_new = _match_uc_wildcards( + df_new, commodity_map, dictionary, get_matching_commodities, "commodity" + ) + + t1 = datetime.now() + + # Unoptimised function + df_old = _match_uc_wildcards_old(df, dictionary) + + t2 = datetime.now() + + logger.info(f"Old method took {t2 - t1} seconds") + logger.info( + f"New method took {t1 - t0} seconds, speedup: {((t2 - t1) / (t1 - t0)):.1f}x" + ) + + # unit tests + assert df_new is not None and not df_new.empty + assert ( + df_new.shape[0] >= df_in.shape[0] + ), "should have more rows after processing uc_wildcards" + assert ( + df_new.shape[1] < df_in.shape[1] + ), "should have fewer columns after processing uc_wildcards" + assert "process" in df_new.columns, "should have added process column" + assert "commodity" in df_new.columns, "should have added commodity column" + + # consistency checks with old method + assert len(set(df_new.columns).symmetric_difference(set(df_old.columns))) == 0 + assert df_new.fillna(-1).equals( + df_old.fillna(-1) + ), "Dataframes should be equal (ignoring Nones and NaNs)" + def test_generate_commodity_groups(self): """ Tests that the _count_comm_group_vectorised function works as expected. @@ -64,4 +164,5 @@ def test_default_pcg_vectorised(self): if __name__ == "__main__": - TestTransforms().test_default_pcg_vectorised() + # TestTransforms().test_default_pcg_vectorised() + TestTransforms().test_uc_wildcards() diff --git a/utils/run_benchmarks.py b/utils/run_benchmarks.py index 8d9205f..33cc790 100644 --- a/utils/run_benchmarks.py +++ b/utils/run_benchmarks.py @@ -14,10 +14,11 @@ import git import pandas as pd import yaml -from loguru import logger from tabulate import tabulate +from dd_to_csv import main from xl2times import utils +from xl2times.__main__ import parse_args, run from xl2times.utils import max_workers logger = utils.get_logger() @@ -146,7 +147,8 @@ def run_benchmark( # First convert ground truth DD to csv if not skip_csv: shutil.rmtree(csv_folder, ignore_errors=True) - if os.name != "nt": + if not debug: + # run as subprocess if not in --debug mode res = subprocess.run( [ "python", @@ -157,6 +159,7 @@ def run_benchmark( stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, + shell=True if os.name == "nt" else False, ) if res.returncode != 0: # Remove partial outputs @@ -166,9 +169,12 @@ def run_benchmark( sys.exit(5) else: # If debug option is set, run as a function call to allow stepping with a debugger. - from dd_to_csv import main - - main([dd_folder, csv_folder]) + try: + main([dd_folder, csv_folder]) + except Exception: + logger.exception(f"dd_to_csv failed on {benchmark['name']}") + shutil.rmtree(csv_folder, ignore_errors=True) + sys.exit(5) elif not path.exists(csv_folder): logger.error(f"--skip_csv is true but {csv_folder} does not exist") @@ -189,22 +195,12 @@ def run_benchmark( else: args.append(xl_folder) start = time.time() - res = None - if not debug: - res = subprocess.run( - ["xl2times"] + args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - else: - # If debug option is set, run as a function call to allow stepping with a debugger. - from xl2times.__main__ import run, parse_args - summary = run(parse_args(args)) + # Call the conversion function directly + summary = run(parse_args(args)) - # pack the results into a namedtuple pretending to be a return value from a subprocess call (as above). - res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0) + # pack the results into a namedtuple pretending to be a return value from a subprocess call (as above). + res = namedtuple("stdout", ["stdout", "stderr", "returncode"])(summary, "", 0) runtime = time.time() - start @@ -255,8 +251,13 @@ def run_all_benchmarks( debug=debug, ) - with ProcessPoolExecutor(max_workers=max_workers) as executor: - results = list(executor.map(run_a_benchmark, benchmarks)) + if debug: + # bypass process pool and call benchmarks directly if --debug is set. + results = [run_a_benchmark(b) for b in benchmarks] + else: + with ProcessPoolExecutor(max_workers=max_workers) as executor: + results = list(executor.map(run_a_benchmark, benchmarks)) + logger.info("\n\n" + tabulate(results, headers, floatfmt=".1f") + "\n") if skip_regression: @@ -302,9 +303,10 @@ def run_all_benchmarks( ) sys.exit(8) - # Re-run benchmarks on main + # Re-run benchmarks on main - check it out and pull repo.heads.main.checkout() - logger.info("Running benchmarks on main", end="", flush=True) + origin.pull("main") # if main already exists, make sure it's up to date + logger.info("Running benchmarks on main") run_a_benchmark = partial( run_benchmark, benchmarks_folder=benchmarks_folder, @@ -441,7 +443,8 @@ def run_all_benchmarks( "--debug", action="store_true", default=False, - help="Run each benchmark as a function call to allow a debugger to stop at breakpoints in benchmark runs.", + help="Run each benchmark as a direct function call (disables subprocesses) to allow a debugger to stop at breakpoints " + "in benchmark runs.", ) args = args_parser.parse_args() @@ -449,11 +452,11 @@ def run_all_benchmarks( benchmarks_folder = spec["benchmarks_folder"] benchmark_names = [b["name"] for b in spec["benchmarks"]] if len(set(benchmark_names)) != len(benchmark_names): - logger.error(f"Found duplicate name in benchmarks YAML file") + logger.error("Found duplicate name in benchmarks YAML file") sys.exit(11) if args.dd and args.times_dir is None: - logger.error(f"--times_dir is required when using --dd") + logger.error("--times_dir is required when using --dd") sys.exit(12) if args.run is not None: diff --git a/xl2times/__main__.py b/xl2times/__main__.py index 0653773..e502768 100644 --- a/xl2times/__main__.py +++ b/xl2times/__main__.py @@ -1,5 +1,7 @@ import argparse from concurrent.futures import ProcessPoolExecutor +from datetime import datetime + from pandas.core.frame import DataFrame import pandas as pd import pickle @@ -27,9 +29,10 @@ def convert_xl_to_times( stop_after_read: bool = False, ) -> Dict[str, DataFrame]: pickle_file = "raw_tables.pkl" + t0 = datetime.now() if use_pkl and os.path.isfile(pickle_file): raw_tables = pickle.load(open(pickle_file, "rb")) - logger.warning(f"Using pickled data not xlsx") + logger.warning("Using pickled data not xlsx") else: raw_tables = [] @@ -40,12 +43,12 @@ def convert_xl_to_times( raw_tables.extend(result) else: for f in input_files: - result = excel.extract_tables(f) + result = excel.extract_tables(str(Path(f).absolute())) raw_tables.extend(result) pickle.dump(raw_tables, open(pickle_file, "wb")) logger.info( f"Extracted {len(raw_tables)} tables," - f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows" + f" {sum(table.dataframe.shape[0] for table in raw_tables)} rows in {datetime.now() - t0}" ) if stop_after_read: @@ -248,7 +251,7 @@ def produce_times_tables( result = {} used_tables = set() for mapping in config.times_xl_maps: - if not mapping.xl_name in input: + if mapping.xl_name not in input: logger.warning( f"Cannot produce table {mapping.times_name} because" f" {mapping.xl_name} does not exist" @@ -281,7 +284,7 @@ def produce_times_tables( # Excel columns can be duplicated into multiple Times columns for times_col, xl_col in mapping.col_map.items(): df[times_col] = df[xl_col] - cols_to_drop = [x for x in df.columns if not x in mapping.times_cols] + cols_to_drop = [x for x in df.columns if x not in mapping.times_cols] df.drop(columns=cols_to_drop, inplace=True) df.drop_duplicates(inplace=True) df.reset_index(drop=True, inplace=True) @@ -392,7 +395,7 @@ def dump_tables(tables: List, filename: str) -> List: return tables -def run(args) -> str | None: +def run(args: argparse.Namespace) -> str | None: """ Runs the xl2times conversion. Args: diff --git a/xl2times/excel.py b/xl2times/excel.py index b9c4597..befde29 100644 --- a/xl2times/excel.py +++ b/xl2times/excel.py @@ -4,7 +4,6 @@ from typing import Dict, List import time from pandas.core.frame import DataFrame -import pandas as pd import numpy import re from . import datatypes @@ -43,8 +42,8 @@ def extract_tables(filename: str) -> List[datatypes.EmbeddedXlTable]: if len(parts) == 2: uc_sets[parts[0].strip()] = parts[1].strip() else: - logger.info( - f"WARNING: Malformed UC_SET in {sheet.title}, {filename}" + logger.warning( + f"Malformed UC_SET in {sheet.title}, {filename}" ) else: col_index = df.columns.get_loc(colname) diff --git a/xl2times/transforms.py b/xl2times/transforms.py index 67ed257..2bac644 100644 --- a/xl2times/transforms.py +++ b/xl2times/transforms.py @@ -6,12 +6,14 @@ from functools import reduce from itertools import groupby from pathlib import Path +from typing import Callable from typing import Dict, List, Set import pandas as pd from loguru import logger from more_itertools import locate, one from pandas.core.frame import DataFrame +from tqdm import tqdm from . import datatypes from . import utils @@ -31,6 +33,20 @@ "PRC_VINT": "vintage", } +process_map = { + "pset_pn": "processes_by_name", + "pset_pd": "processes_by_desc", + "pset_set": "processes_by_sets", + "pset_ci": "processes_by_comm_in", + "pset_co": "processes_by_comm_out", +} + +commodity_map = { + "cset_cn": "commodities_by_name", + "cset_cd": "commodities_by_desc", + "cset_set": "commodities_by_sets", +} + def remove_comment_rows( config: datatypes.Config, @@ -292,27 +308,32 @@ def merge_tables( for key, value in groupby(sorted(tables, key=lambda t: t.tag), lambda t: t.tag): group = list(value) - if not all( - set(t.dataframe.columns) == set(group[0].dataframe.columns) for t in group - ): - cols = [(",".join(g.dataframe.columns), g) for g in group] - logger.warning( - f"Cannot merge tables with tag {key} as their columns are not identical" - ) - for c, table in cols: - logger.info( - f" {c} from {table.range}, {table.sheetname}, {table.filename}" - ) - else: - df = pd.concat([table.dataframe for table in group], ignore_index=True) - - match key: - case datatypes.Tag.fi_comm: - model.commodities = df - case datatypes.Tag.fi_process: - model.processes = df - case _: - result[key] = df + + if len(group) == 0: + continue + + df = pd.concat([table.dataframe for table in group], ignore_index=True) + result[key] = df + + # VEDA appears to support merging tables where come columns are optional, e.g. ctslvl and ctype from ~FI_COMM. + # So just print detailed warning if we find tables with fewer columns than the concat'ed table. + concat_cols = set(df.columns) + missing_cols = [concat_cols - set(t.dataframe.columns) for t in group] + + if any([len(m) for m in missing_cols]): + err = f"WARNING: Possible merge error for table: '{key}'! Merged table has more columns than individual table(s), see details below:" + for table in group: + err += f"\n\tColumns: {list(table.dataframe.columns)} from {table.range}, {table.sheetname}, {table.filename}" + logger.warning(err) + + match key: + case datatypes.Tag.fi_comm: + model.commodities = df + case datatypes.Tag.fi_process: + model.processes = df + case _: + result[key] = df + return result @@ -1991,7 +2012,7 @@ def process_transform_availability( return result -def filter_by_pattern(df, pattern): +def filter_by_pattern(df: pd.DataFrame, pattern: str) -> pd.DataFrame: # Duplicates can be created when a process has multiple commodities that match the pattern df = df.filter(regex=utils.create_regexp(pattern), axis="index").drop_duplicates() exclude = df.filter(regex=utils.create_negative_regexp(pattern), axis="index").index @@ -2004,35 +2025,28 @@ def intersect(acc, df): return acc.merge(df) -def get_matching_processes(row, dictionary): +def get_matching_processes(row: pd.Series, topology: Dict[str, DataFrame]) -> pd.Series: matching_processes = None - for col, key in [ - ("pset_pn", "processes_by_name"), - ("pset_pd", "processes_by_desc"), - ("pset_set", "processes_by_sets"), - ("pset_ci", "processes_by_comm_in"), - ("pset_co", "processes_by_comm_out"), - ]: + for col, key in process_map.items(): if col in row.index and row[col] is not None: - matching_processes = intersect( - matching_processes, filter_by_pattern(dictionary[key], row[col].upper()) - ) + proc_set = topology[key] + pattern = row[col].upper() + filtered = filter_by_pattern(proc_set, pattern) + matching_processes = intersect(matching_processes, filtered) + if matching_processes is not None and any(matching_processes.duplicated()): raise ValueError("duplicated") + return matching_processes -def get_matching_commodities(row, dictionary): +def get_matching_commodities(row: pd.Series, topology: Dict[str, DataFrame]): matching_commodities = None - for col, key in [ - ("cset_cn", "commodities_by_name"), - ("cset_cd", "commodities_by_desc"), - ("cset_set", "commodities_by_sets"), - ]: + for col, key in commodity_map.items(): if col in row.index and row[col] is not None: matching_commodities = intersect( matching_commodities, - filter_by_pattern(dictionary[key], row[col].upper()), + filter_by_pattern(topology[key], row[col].upper()), ) return matching_commodities @@ -2103,40 +2117,18 @@ def process_uc_wildcards( ) -> Dict[str, DataFrame]: tag = datatypes.Tag.uc_t - def make_str(df): - if df is not None and len(df) != 0: - list_from_df = df.iloc[:, 0].unique() - return ",".join(list_from_df) - else: - return None - - if tag in tables: + if tag in tqdm(tables, desc="Processing uc_wildcards on tables"): start_time = time.time() df = tables[tag] - query_columns = config.query_columns[tag] dictionary = generate_topology_dictionary(tables, model) - df["process"] = df.apply( - lambda row: make_str(get_matching_processes(row, dictionary)), axis=1 + df = _match_uc_wildcards( + df, process_map, dictionary, get_matching_processes, "process" ) - df["commodity"] = df.apply( - lambda row: make_str(get_matching_commodities(row, dictionary)), axis=1 + df = _match_uc_wildcards( + df, commodity_map, dictionary, get_matching_commodities, "commodity" ) - cols_to_drop = [col for col in df.columns if col in query_columns] - - df = expand_rows( - query_columns, - datatypes.EmbeddedXlTable( - tag="", - uc_sets={}, - sheetname="", - range="", - filename="", - dataframe=df.drop(columns=cols_to_drop), - ), - ).dataframe - tables[tag] = df logger.info( @@ -2146,6 +2138,66 @@ def make_str(df): return tables +def _match_uc_wildcards( + df: pd.DataFrame, + process_map: dict[str, str], + dictionary: dict[str, pd.DataFrame], + matcher: Callable, + result_col: str, +) -> pd.DataFrame: + """ + Match wildcards in the given table using the given process map and dictionary. + + Args: + df: Table to match wildcards in. + process_map: Mapping of column names to process sets. + dictionary: Dictionary of process sets to match against. + matcher: Matching function to use, e.g. get_matching_processes or get_matching_commodities. + result_col: Name of the column to store the matched results in. + + Returns: + The table with the wildcard columns removed and the results of the wildcard matches added as a column named `results_col` + """ + proc_cols = list(process_map.keys()) + + # drop duplicate sets of wildcard columns to save repeated (slow) regex matching. This makes things much faster. + unique_filters = df[proc_cols].drop_duplicates().dropna(axis="rows", how="all") + + # match all the wildcards columns against the dictionary names + matches = unique_filters.apply(lambda row: matcher(row, dictionary), axis=1) + + # we occasionally get a Dataframe back from the matchers. convert these to Series. + matches = ( + matches.iloc[:, 0].to_list() + if isinstance(matches, pd.DataFrame) + else matches.to_list() + ) + matches = [ + df.iloc[:, 0].to_list() if df is not None and len(df) != 0 else None + for df in matches + ] + matches = pd.DataFrame({result_col: matches}) + + # then join with the wildcard cols to their list of matched names so we can join them back into the table df. + filter_matches = unique_filters.reset_index(drop=True).merge( + matches, left_index=True, right_index=True + ) + + # Finally we merge the matches back into the original table. This join re-duplicates the duplicate filters dropped above for speed. + # And we explode any matches to multiple names to give a long-format table. + df = ( + df.merge(filter_matches, left_on=proc_cols, right_on=proc_cols, how="left") + .explode(result_col) + .reset_index(drop=True) + .drop(columns=proc_cols) + ) + + # replace NaNs in results_col with None for consistency with older logic + df[result_col] = df[result_col].where(df[result_col].notna(), None) + + return df + + def process_wildcards( config: datatypes.Config, tables: Dict[str, DataFrame], @@ -2170,7 +2222,7 @@ def match_wildcards( matching_commodities is None or len(matching_commodities) == 0 ): # TODO is this necessary? Try without? # TODO debug these - logger.warning(f"a row matched no processes or commodities") + logger.warning("a row matched no processes or commodities") return None return matching_processes, matching_commodities @@ -2214,7 +2266,11 @@ def eval_and_update( # TFM_UPD: expand wildcards in each row, query FI_T to find matching rows, # evaluate the update formula, and add new rows to FI_T # TODO perf: collect all updates and go through FI_T only once? - for _, row in updates.iterrows(): + for _, row in tqdm( + updates.iterrows(), + total=len(updates), + desc=f"Processing wildcard for {datatypes.Tag.tfm_upd}", + ): if row["value"] is None: # TODO is this really needed? continue match = match_wildcards(row) @@ -2237,7 +2293,11 @@ def eval_and_update( new_tables = [] # TFM_INS: expand each row by wildcards, then add to FI_T - for _, row in updates.iterrows(): + for _, row in tqdm( + updates.iterrows(), + total=len(updates), + desc=f"Processing wildcard for {datatypes.Tag.tfm_ins}", + ): match = match_wildcards(row) # TODO perf: add matched procs/comms into column and use explode? new_rows = pd.DataFrame([row.filter(table.columns)]) @@ -2257,10 +2317,14 @@ def eval_and_update( # TFM_INS-TXT: expand row by wildcards, query FI_PROC/COMM for matching rows, # evaluate the update formula, and inplace update the rows - for _, row in updates.iterrows(): + for _, row in tqdm( + updates.iterrows(), + total=len(updates), + desc=f"Processing wildcard for {datatypes.Tag.tfm_ins_txt}", + ): match = match_wildcards(row) if match is None: - logger.warning(f"TFM_INS-TXT row matched neither commodity nor process") + logger.warning("TFM_INS-TXT row matched neither commodity nor process") continue processes, commodities = match if commodities is not None: @@ -2282,7 +2346,11 @@ def eval_and_update( table = tables[datatypes.Tag.fi_t] new_tables = [] - for _, row in updates.iterrows(): + for _, row in tqdm( + updates.iterrows(), + total=len(updates), + desc=f"Processing wildcard for {datatypes.Tag.tfm_mig}", + ): match = match_wildcards(row) processes, commodities = match if match is not None else (None, None) # TODO should we also query on limtype? diff --git a/xl2times/utils.py b/xl2times/utils.py index 0628e38..fcc10eb 100644 --- a/xl2times/utils.py +++ b/xl2times/utils.py @@ -2,6 +2,7 @@ annotations, ) # see https://loguru.readthedocs.io/en/stable/api/type_hints.html#module-autodoc_stub_file.loguru +import functools import os import re import sys @@ -192,6 +193,7 @@ def remove_positive_patterns(pattern): return ",".join([word[1:] for word in pattern.split(",") if word[0] == "-"]) +@functools.lru_cache(maxsize=int(1e6)) def create_regexp(pattern): # exclude negative patterns if has_negative_patterns(pattern): @@ -205,6 +207,7 @@ def create_regexp(pattern): return re.compile(pattern) +@functools.lru_cache(maxsize=int(1e6)) def create_negative_regexp(pattern): pattern = remove_positive_patterns(pattern) if len(pattern) == 0: @@ -262,7 +265,7 @@ def get_logger(log_name: str = default_log_name, log_dir: str = ".") -> loguru.L "level": "DEBUG", "colorize": False, "serialize": False, - "diagnose": False, + "diagnose": True, "rotation": "20 MB", "compression": "zip", },