From 9e3e5a3a2cb2197d4014c4a1b7089da6522fb269 Mon Sep 17 00:00:00 2001 From: Olexandr Balyk Date: Wed, 29 Nov 2023 08:32:20 -0500 Subject: [PATCH] Expand config with veda defaults for attributes (#143) This PR closes #55. --------- Co-authored-by: Siddharth Krishna --- times_reader/__main__.py | 7 +- times_reader/config/veda-attr-defaults.json | 1064 +++++++++++++++++++ times_reader/datatypes.py | 49 +- times_reader/transforms.py | 316 +----- 4 files changed, 1130 insertions(+), 306 deletions(-) create mode 100644 times_reader/config/veda-attr-defaults.json diff --git a/times_reader/__main__.py b/times_reader/__main__.py index c5cbb03..741199d 100644 --- a/times_reader/__main__.py +++ b/times_reader/__main__.py @@ -404,7 +404,12 @@ def main(): ) args = args_parser.parse_args() - config = datatypes.Config("times_mapping.txt", "times-info.json", "veda-tags.json") + config = datatypes.Config( + "times_mapping.txt", + "times-info.json", + "veda-tags.json", + "veda-attr-defaults.json", + ) if not isinstance(args.input, list) or len(args.input) < 1: print(f"ERROR: expected at least 1 input. Got {args.input}") diff --git a/times_reader/config/veda-attr-defaults.json b/times_reader/config/veda-attr-defaults.json new file mode 100644 index 0000000..7e215e5 --- /dev/null +++ b/times_reader/config/veda-attr-defaults.json @@ -0,0 +1,1064 @@ +{ + "ACT2FLO": { + "times-attribute": "PRC_ACTFLO" + }, + "ACTBND": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_BND" + }, + "ACTCOST": { + "times-attribute": "ACT_COST" + }, + "ACTFLO": { + "times-attribute": "PRC_ACTFLO" + }, + "ACT_BND": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "ACT_CSTRMP": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_CSTSD": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_CSTUP": { + "defaults": { + "ts-level": "DAYNITE" + } + }, + "ACT_CUM": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_EFF": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "ACT_FLO": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "ACT_LOSPL": { + "defaults": { + "limtype": "FX" + } + }, + "ACT_LOSSD": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_SDTIME": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_TIME": { + "defaults": { + "limtype": "UP" + } + }, + "ACT_UPS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "AF": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + }, + "times-attribute": "NCAP_AF" + }, + "AFA": { + "defaults": { + "limtype": "UP" + }, + "times-attribute": "NCAP_AFA" + }, + "AFAC": { + "times-attribute": "NCAP_AFAC" + }, + "AFC": { + "defaults": { + "ts-level": "ANNUAL" + }, + "times-attribute": "NCAP_AFC" + }, + "AFCS": { + "times-attribute": "NCAP_AFCS" + }, + "AFM": { + "times-attribute": "NCAP_AFM" + }, + "AFS": { + "times-attribute": "NCAP_AFS" + }, + "AFSX": { + "times-attribute": "NCAP_AFSX" + }, + "AFX": { + "times-attribute": "NCAP_AFX" + }, + "ARAF": { + "times-attribute": "NCAP_AFA" + }, + "AVAILABILITY": { + "times-attribute": "NCAP_AF" + }, + "BNDACT": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_BND" + }, + "BNDCAP": { + "times-attribute": "CAP_BND" + }, + "BNDCST": { + "times-attribute": "REG_BNDCST" + }, + "BNDINV": { + "times-attribute": "NCAP_BND" + }, + "BS_BNDPRS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "BS_DELTA": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_DEMDET": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_MAINT": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_OMEGA": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_RMAX": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_SHARE": { + "defaults": { + "limtype": "UP" + } + }, + "BS_SIGMA": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "BS_STIME": { + "defaults": { + "limtype": "LO" + } + }, + "CAGG": { + "times-attribute": "COM_AGG" + }, + "CAP2ACT": { + "times-attribute": "PRC_CAPACT" + }, + "CAPUNIT": { + "times-attribute": "PRC_CAPACT" + }, + "CAP_BND": { + "defaults": { + "limtype": "UP" + } + }, + "CBNDNET": { + "times-attribute": "COM_BNDNET" + }, + "CBNDPRD": { + "times-attribute": "COM_BNDPRD" + }, + "CBPRICE": { + "times-attribute": "COM_BPRICE" + }, + "CCSTNET": { + "times-attribute": "COM_CSTNET" + }, + "CCSTPRD": { + "times-attribute": "COM_CSTPRD" + }, + "CEFF": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "CEFF-I": { + "defaults": { + "commodity": [ + "commodity-in" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "CEFF-O": { + "defaults": { + "commodity": [ + "commodity-out" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "CEFFICIENCY": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "CEH": { + "times-attribute": "VDA_CEH" + }, + "CELAST": { + "times-attribute": "COM_ELAST" + }, + "CF": { + "times-attribute": "NCAP_AF" + }, + "CHPR": { + "defaults": { + "limtype": "FX" + }, + "times-attribute": "NCAP_CHPR" + }, + "CLAG": { + "times-attribute": "NCAP_CLAG" + }, + "CLED": { + "times-attribute": "NCAP_CLED" + }, + "COM_BNDNET": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "COM_BNDPRD": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "COM_BPRICE": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_CSTBAL": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_CSTNET": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_CSTPRD": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_CUMNET": { + "defaults": { + "limtype": "UP" + } + }, + "COM_CUMPRD": { + "defaults": { + "limtype": "UP" + } + }, + "COM_ELAST": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_IE": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_SUBNET": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_SUBPRD": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_TAXNET": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COM_TAXPRD": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "COST": { + "defaults": { + "ts-level": "ANNUAL" + }, + "times-attribute": "IRE_PRICE" + }, + "CPX": { + "times-attribute": "NCAP_CPX" + }, + "CSTEP": { + "times-attribute": "COM_STEP" + }, + "CSUBNET": { + "times-attribute": "COM_SUBNET" + }, + "CSUBPRD": { + "times-attribute": "COM_SUBPRD" + }, + "CTAXNET": { + "times-attribute": "COM_TAXNET" + }, + "CTAXPRD": { + "times-attribute": "COM_TAXPRD" + }, + "CUM": { + "defaults": { + "limtype": "UP" + }, + "times-attribute": "ACT_CUM" + }, + "CUMCST": { + "times-attribute": "REG_CUMCST" + }, + "CURREX": { + "times-attribute": "G_CHNGMONY" + }, + "CVOC": { + "times-attribute": "COM_VOC" + }, + "DAMCOST": { + "times-attribute": "DAM_COST" + }, + "DBQTY": { + "times-attribute": "DAM_BQTY" + }, + "DCOST": { + "times-attribute": "NCAP_DCOST" + }, + "DELAST": { + "times-attribute": "DAM_ELAST" + }, + "DELIF": { + "times-attribute": "NCAP_DELIF" + }, + "DELIV": { + "defaults": { + "commodity": [ + "commodity-in" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_DELIV" + }, + "DEMAND": { + "times-attribute": "COM_PROJ" + }, + "DISCOUNT": { + "times-attribute": "G_DRATE" + }, + "DISCRATE": { + "times-attribute": "NCAP_DRATE" + }, + "DLAG": { + "times-attribute": "NCAP_DLAG" + }, + "DLAGC": { + "times-attribute": "NCAP_DLAGC" + }, + "DLIFE": { + "times-attribute": "NCAP_DLIFE" + }, + "DRATE": { + "times-attribute": "NCAP_DRATE" + }, + "DSTEP": { + "times-attribute": "DAM_STEP" + }, + "DVOC": { + "times-attribute": "DAM_VOC" + }, + "EFF": { + "defaults": { + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "EFFICIENCY": { + "defaults": { + "ts-level": "ANNUAL" + }, + "times-attribute": "ACT_EFF" + }, + "ELASTX": { + "times-attribute": "COM_ELASTX" + }, + "ELIFE": { + "times-attribute": "NCAP_ELIFE" + }, + "EMCB": { + "times-attribute": "VDA_EMCB" + }, + "ENVACT": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_EMIS" + }, + "ENVBND": { + "times-attribute": "COM_BNDNET" + }, + "ENVCAP": { + "times-attribute": "NCAP_COM" + }, + "ENVCOST": { + "times-attribute": "COM_CSTNET" + }, + "ENVCUM": { + "times-attribute": "COM_CUMNET" + }, + "ENV_ACT": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_EMIS" + }, + "ENV_CAP": { + "times-attribute": "NCAP_COM" + }, + "ENV_COST": { + "times-attribute": "COM_CSTNET" + }, + "ENV_CUMMAX": { + "times-attribute": "COM_CUMNET" + }, + "FCOST": { + "times-attribute": "FLO_COST" + }, + "FEMIS": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_EMIS" + }, + "FIXOM": { + "times-attribute": "NCAP_FOM" + }, + "FLOP": { + "times-attribute": "VDA_FLOP" + }, + "FLOSHAR": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "limtype": "FX", + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_SHAR" + }, + "FLO_BND": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "FLO_COST": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "ts-level": "ANNUAL" + } + }, + "FLO_CUM": { + "defaults": { + "limtype": "UP" + } + }, + "FLO_DELIV": { + "defaults": { + "commodity": [ + "commodity-in" + ], + "ts-level": "ANNUAL" + } + }, + "FLO_EFF": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + } + }, + "FLO_EMIS": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + } + }, + "FLO_FR": { + "defaults": { + "limtype": "UP" + } + }, + "FLO_FUNC": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "FLO_MARK": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "limtype": "UP" + } + }, + "FLO_SHAR": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "limtype": "FX", + "ts-level": "ANNUAL" + } + }, + "FLO_SUB": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + } + }, + "FLO_TAX": { + "defaults": { + "commodity": [ + "commodity-out", + "commodity-in" + ], + "ts-level": "ANNUAL" + } + }, + "FMSHARE": { + "times-attribute": "FLO_MARK" + }, + "FOMM": { + "times-attribute": "NCAP_FOMM" + }, + "FOMX": { + "times-attribute": "NCAP_FOMX" + }, + "FRACTION": { + "times-attribute": "COM_IE" + }, + "FSUB": { + "times-attribute": "NCAP_FSUB" + }, + "FSUBM": { + "times-attribute": "NCAP_FSUBM" + }, + "FSUBX": { + "times-attribute": "NCAP_FSUBX" + }, + "FTAXM": { + "times-attribute": "NCAP_FTAXM" + }, + "FTAXX": { + "times-attribute": "NCAP_FTAXX" + }, + "GR_DEMFR": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "GR_VARGEN": { + "defaults": { + "limtype": "LO" + } + }, + "G_YRFR": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "HRESERV": { + "times-attribute": "COM_PKRSV" + }, + "IBOND": { + "times-attribute": "NCAP_BND" + }, + "IFLOSUM": { + "times-attribute": "IRE_FLOSUM" + }, + "ILED": { + "times-attribute": "NCAP_ILED" + }, + "INVCOST": { + "times-attribute": "NCAP_COST" + }, + "IRE_BND": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "IRE_FLOSUM": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "IRE_PRICE": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "IRE_XBND": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "IRE_XXBND": { + "times-attribute": "IRE_XBND" + }, + "ISPCT": { + "times-attribute": "NCAP_ISPCT" + }, + "ITAX": { + "times-attribute": "NCAP_ITAX" + }, + "LIFE": { + "times-attribute": "NCAP_TLIFE" + }, + "NCAP_AF": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "NCAP_AFA": { + "defaults": { + "limtype": "UP" + } + }, + "NCAP_AFAC": { + "defaults": { + "limtype": "UP" + } + }, + "NCAP_AFC": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "NCAP_AFCS": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "NCAP_AFS": { + "defaults": { + "limtype": "UP" + } + }, + "NCAP_AFSX": { + "defaults": { + "limtype": "UP" + } + }, + "NCAP_BND": { + "defaults": { + "limtype": "UP" + } + }, + "NCAP_CHPR": { + "defaults": { + "limtype": "FX" + } + }, + "NCAP_PKCNT": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "OCOM": { + "times-attribute": "NCAP_OCOM" + }, + "OLIFE": { + "times-attribute": "NCAP_OLIFE" + }, + "PASTI": { + "times-attribute": "NCAP_PASTI" + }, + "PASTY": { + "times-attribute": "NCAP_PASTY" + }, + "PEAK": { + "defaults": { + "ts-level": "ANNUAL" + }, + "times-attribute": "NCAP_PKCNT" + }, + "PKCNT": { + "times-attribute": "NCAP_PKCNT" + }, + "PKCOI": { + "times-attribute": "FLO_PKCOI" + }, + "PKFLX": { + "times-attribute": "COM_PKFLX" + }, + "PRC_FOFF": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "PRC_MARK": { + "defaults": { + "limtype": "UP" + } + }, + "RCAP_BND": { + "defaults": { + "limtype": "LO" + } + }, + "REG_BDNCAP": { + "defaults": { + "limtype": "FX" + } + }, + "REG_BNDCST": { + "defaults": { + "limtype": "UP" + } + }, + "REG_CUMCST": { + "defaults": { + "limtype": "UP" + } + }, + "RESID": { + "times-attribute": "PRC_RESID" + }, + "RFRIR": { + "times-attribute": "G_RFRIR" + }, + "SDAMCOST": { + "times-attribute": "S_DAM_COST" + }, + "SHARE": { + "defaults": { + "commodity": [ + "commodity-in", + "commodity-out" + ], + "limtype": "FX", + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_SHAR" + }, + "SHARE-I": { + "defaults": { + "commodity": [ + "commodity-in" + ], + "limtype": "FX", + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_SHAR" + }, + "SHARE-O": { + "defaults": { + "commodity": [ + "commodity-out" + ], + "limtype": "FX", + "ts-level": "ANNUAL" + }, + "times-attribute": "FLO_SHAR" + }, + "SRAF": { + "times-attribute": "NCAP_AFS" + }, + "START": { + "times-attribute": "NCAP_START" + }, + "STGIN_BND": { + "defaults": { + "commodity": [ + "commodity-in" + ], + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "STGOUT_BND": { + "defaults": { + "commodity": [ + "commodity-out" + ], + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "STG_CHRG": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "STG_LOSS": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "STG_SIFT": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "STOCK": { + "times-attribute": "PRC_RESID" + }, + "S_CAP_BND": { + "defaults": { + "limtype": "UP" + } + }, + "S_CHRG": { + "times-attribute": "STG_CHRG" + }, + "S_COM_CUMNET": { + "defaults": { + "limtype": "UP" + } + }, + "S_COM_CUMPRD": { + "defaults": { + "limtype": "UP" + } + }, + "S_DCOST": { + "times-attribute": "S_DAM_COST" + }, + "S_EFF": { + "times-attribute": "STG_EFF" + }, + "S_FLO_CUM": { + "defaults": { + "limtype": "UP" + } + }, + "S_INBND": { + "times-attribute": "STGIN_BND" + }, + "S_LOSS": { + "times-attribute": "STG_LOSS" + }, + "S_OUTBND": { + "times-attribute": "STGOUT_BND" + }, + "S_UC_RHS": { + "defaults": { + "limtype": "UP" + } + }, + "S_UC_RHSR": { + "defaults": { + "limtype": "UP" + } + }, + "S_UC_RHSRT": { + "defaults": { + "limtype": "UP" + } + }, + "S_UC_RHSRTS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "S_UC_RHSTS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "TFTAX": { + "times-attribute": "NCAP_FTAX" + }, + "TISUB": { + "times-attribute": "NCAP_ISUB" + }, + "TSCVT": { + "times-attribute": "IRE_TSCVT" + }, + "TS_CYCLE": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_ACT": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_COMCON": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_COMNET": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_COMPRD": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_DYNBND": { + "defaults": { + "limtype": "UP" + } + }, + "UC_FLO": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_IRE": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "UC_RHS": { + "defaults": { + "limtype": "UP" + } + }, + "UC_RHSR": { + "defaults": { + "limtype": "UP" + } + }, + "UC_RHSRT": { + "defaults": { + "limtype": "UP" + } + }, + "UC_RHSRTS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "UC_RHST": { + "defaults": { + "limtype": "UP" + } + }, + "UC_RHSTS": { + "defaults": { + "limtype": "UP", + "ts-level": "ANNUAL" + } + }, + "UTILIZATION": { + "times-attribute": "NCAP_AF" + }, + "VAROM": { + "times-attribute": "ACT_COST" + }, + "VDA_EFF": { + "times-attribute": "ACT_EFF" + }, + "VDA_FLOP": { + "defaults": { + "ts-level": "ANNUAL" + } + }, + "YRFR": { + "times-attribute": "G_YRFR" + } +} diff --git a/times_reader/datatypes.py b/times_reader/datatypes.py index fc7fed2..b58e9d7 100644 --- a/times_reader/datatypes.py +++ b/times_reader/datatypes.py @@ -148,13 +148,23 @@ class Config: all_attributes: Set[str] # For each tag, this dictionary maps each column alias to the normalized name column_aliases: Dict[Tag, Dict[str, str]] + veda_attr_defaults: Dict[str, Dict[str, list]] - def __init__(self, mapping_file: str, times_info_file: str, veda_tags_file: str): + def __init__( + self, + mapping_file: str, + times_info_file: str, + veda_tags_file: str, + veda_attr_defaults_file: str, + ): self.times_xl_maps = Config._read_mappings(mapping_file) self.dd_table_order, self.all_attributes = Config._process_times_info( times_info_file ) self.column_aliases = Config._read_veda_tags_info(veda_tags_file) + self.veda_attr_defaults = Config._read_veda_attr_defaults( + veda_attr_defaults_file + ) @staticmethod def _process_times_info(times_info_file: str) -> Tuple[Iterable[str], Set[str]]: @@ -270,3 +280,40 @@ def _read_veda_tags_info(veda_tags_file: str) -> Dict[Tag, Dict[str, str]]: for alias in aliases: column_aliases[tag_name][alias] = name return column_aliases + + @staticmethod + def _read_veda_attr_defaults( + veda_attr_defaults_file: str, + ) -> Dict[str, Dict[str, list]]: + # Read veda_tags_file + with resources.open_text("times_reader.config", veda_attr_defaults_file) as f: + defaults = json.load(f) + + veda_attr_defaults = { + "aliases": defaultdict(list), + "commodity": {}, + "limtype": {"FX": [], "LO": [], "UP": []}, + "tslvl": {"DAYNITE": [], "ANNUAL": []}, + } + + for attr, attr_info in defaults.items(): + # Populate aliases by attribute dictionary + if "times-attribute" in attr_info: + times_attr = attr_info["times-attribute"] + veda_attr_defaults["aliases"][times_attr].append(attr) + + if "defaults" in attr_info: + attr_defaults = attr_info["defaults"] + + if "commodity" in attr_defaults: + veda_attr_defaults["commodity"][attr] = attr_defaults["commodity"] + + if "limtype" in attr_defaults: + limtype = attr_defaults["limtype"] + veda_attr_defaults["limtype"][limtype].append(attr) + + if "ts-level" in attr_defaults: + tslvl = attr_defaults["ts-level"] + veda_attr_defaults["tslvl"][tslvl].append(attr) + + return veda_attr_defaults diff --git a/times_reader/transforms.py b/times_reader/transforms.py index 4100d02..f4ef4a6 100644 --- a/times_reader/transforms.py +++ b/times_reader/transforms.py @@ -29,118 +29,6 @@ cset + io for cset in csets_ordered_for_pcg for io in ["I", "O"] ] -# Specify a list of aliases per TIMES attribute -aliases_by_attr = { - "ACT_BND": ["ACTBND", "BNDACT"], - "ACT_COST": ["ACTCOST", "VAROM"], - "ACT_CUM": ["CUM"], - "ACT_EFF": [ - "CEFF", - "CEFFICIENCY", - "CEFF-I", - "CEFF-O", - "EFF", - "EFFICIENCY", - "VDA_EFF", - ], - "CAP_BND": ["BNDCAP"], - "COM_AGG": ["CAGG"], - "COM_BNDNET": ["ENVBND", "CBNDNET"], - "COM_BNDPRD": ["CBNDPRD"], - "COM_BPRICE": ["CBPRICE"], - "COM_CSTNET": ["ENV_COST", "ENVCOST", "CCSTNET"], - "COM_CSTPRD": ["CCSTPRD"], - "COM_CUMNET": ["ENV_CUMMAX", "ENVCUM"], - "COM_ELAST": ["CELAST"], - "COM_ELASTX": ["ELASTX"], - "COM_IE": ["FRACTION"], - "COM_PKFLX": ["PKFLX"], - "COM_PKRSV": ["HRESERV"], - "COM_PROJ": ["DEMAND"], - "COM_STEP": ["CSTEP"], - "COM_SUBNET": ["CSUBNET"], - "COM_SUBPRD": ["CSUBPRD"], - "COM_TAXNET": ["CTAXNET"], - "COM_TAXPRD": ["CTAXPRD"], - "COM_VOC": ["CVOC"], - "DAM_BQTY": ["DBQTY"], - "DAM_COST": ["DAMCOST"], - "DAM_ELAST": ["DELAST"], - "DAM_STEP": ["DSTEP"], - "DAM_VOC": ["DVOC"], - "FLO_COST": ["FCOST"], - "FLO_DELIV": ["DELIV"], - "FLO_EMIS": ["ENV_ACT", "ENVACT", "FEMIS"], - "FLO_MARK": ["FMSHARE"], - "FLO_PKCOI": ["PKCOI"], - "FLO_SHAR": ["FLOSHAR", "SHARE", "SHARE-I", "SHARE-O"], - "G_CHNGMONY": ["CURREX"], - "G_DRATE": ["DISCOUNT"], - "G_RFRIR": ["RFRIR"], - "G_YRFR": ["YRFR"], - "IRE_FLOSUM": ["IFLOSUM"], - "IRE_PRICE": ["COST"], - "IRE_TSCVT": ["TSCVT"], - "IRE_XBND": ["IRE_XXBND"], - "NCAP_AF": ["AF", "AVAILABILITY", "CF", "UTILIZATION"], - "NCAP_AFA": ["AFA", "ARAF"], - "NCAP_AFAC": ["AFAC"], - "NCAP_AFC": ["AFC"], - "NCAP_AFCS": ["AFCS"], - "NCAP_AFM": ["AFM"], - "NCAP_AFS": ["AFS", "SRAF"], - "NCAP_AFSX": ["AFSX"], - "NCAP_AFX": ["AFX"], - "NCAP_BND": ["IBOND", "BNDINV"], - "NCAP_CHPR": ["CHPR"], - "NCAP_CLAG": ["CLAG"], - "NCAP_CLED": ["CLED"], - "NCAP_COM": ["ENV_CAP", "ENVCAP"], - "NCAP_COST": ["INVCOST"], - "NCAP_CPX": ["CPX"], - "NCAP_DCOST": ["DCOST"], - "NCAP_DELIF": ["DELIF"], - "NCAP_DLAG": ["DLAG"], - "NCAP_DLAGC": ["DLAGC"], - "NCAP_DLIFE": ["DLIFE"], - "NCAP_DRATE": ["DISCRATE", "DRATE"], - "NCAP_ELIFE": ["ELIFE"], - "NCAP_FOM": ["FIXOM"], - "NCAP_FOMM": ["FOMM"], - "NCAP_FOMX": ["FOMX"], - "NCAP_FSUB": ["FSUB"], - "NCAP_FSUBM": ["FSUBM"], - "NCAP_FSUBX": ["FSUBX"], - "NCAP_FTAX": ["TFTAX"], - "NCAP_FTAXM": ["FTAXM"], - "NCAP_FTAXX": ["FTAXX"], - "NCAP_ILED": ["ILED"], - "NCAP_ISPCT": ["ISPCT"], - "NCAP_ISUB": ["TISUB"], - "NCAP_ITAX": ["ITAX"], - "NCAP_OCOM": ["OCOM"], - "NCAP_OLIFE": ["OLIFE"], - "NCAP_PASTI": ["PASTI"], - "NCAP_PASTY": ["PASTY"], - "NCAP_PKCNT": ["PEAK", "PKCNT"], - "NCAP_START": ["START"], - "NCAP_TLIFE": ["LIFE"], - "PRC_ACTFLO": ["ACTFLO", "ACT2FLO"], - "PRC_CAPACT": ["CAPUNIT", "CAP2ACT"], - "PRC_RESID": ["RESID", "STOCK"], - "REG_BNDCST": ["BNDCST"], - "REG_CUMCST": ["CUMCST"], - "S_DAM_COST": ["S_DCOST", "SDAMCOST"], - "STG_CHRG": ["S_CHRG"], - "STG_EFF": ["S_EFF"], - "STG_LOSS": ["S_LOSS"], - "STGIN_BND": ["S_INBND"], - "STGOUT_BND": ["S_OUTBND"], - "VDA_CEH": ["CEH"], - "VDA_EMCB": ["EMCB"], - "VDA_FLOP": ["FLOP"], -} - attr_prop = { "COM_LIM": "limtype", "COM_TSL": "ctslvl", @@ -150,191 +38,6 @@ "PRC_VINT": "vintage", } -# Specify, in order of priority, what to use as CommName if CommName is empty -attr_com_def = { - "CEFF": ["commodity-in", "commodity-out"], # this one is a Veda alias - "CEFFICIENCY": [ - "commodity-in", - "commodity-out", - ], # this one is an alias of the above - "CEFF-I": ["commodity-in"], - "CEFF-O": ["commodity-out"], - "FLO_COST": ["commodity-in", "commodity-out"], - "FLO_DELIV": ["commodity-in"], - "DELIV": ["commodity-in"], - "FLO_EMIS": ["commodity-out", "commodity-in"], - "FEMIS": ["commodity-out", "commodity-in"], - "FLO_EFF": ["commodity-out", "commodity-in"], - "ENV_ACT": ["commodity-out", "commodity-in"], - "ENVACT": ["commodity-out", "commodity-in"], - "FLO_MARK": ["commodity-in", "commodity-out"], - "FLO_SHAR": ["commodity-in", "commodity-out"], - "FLOSHAR": ["commodity-in", "commodity-out"], - "SHARE": ["commodity-in", "commodity-out"], - "SHARE-I": ["commodity-in"], - "SHARE-O": ["commodity-out"], - "FLO_SUB": ["commodity-out", "commodity-in"], - "FLO_TAX": ["commodity-out", "commodity-in"], - "STGIN_BND": ["commodity-in"], - "STGOUT_BND": ["commodity-out"], -} - -attr_limtype_def = { - "FX": [ - "ACT_LOSPL", - "FLO_SHAR", - "FLOSHAR", - "SHARE", - "SHARE-I", - "SHARE-O", - "NCAP_CHPR", - "CHPR", - "REG_BDNCAP", - ], - "LO": ["BS_STIME", "GR_VARGEN", "RCAP_BND"], - "UP": [ - "ACT_BND", - "ACTBND", - "BNDACT", - "ACT_CSTRMP", - "ACT_CSTSD", - "ACT_CUM", - "CUM", - "ACT_LOSSD", - "ACT_SDTIME", - "ACT_TIME", - "ACT_UPS", - "BS_BNDPRS", - "BS_SHARE", - "CAP_BND", - "COM_BNDNET", - "COM_BNDPRD", - "COM_CUMNET", - "COM_CUMPRD", - "FLO_BND", - "FLO_CUM", - "FLO_FR", - "FLO_MARK", - "IRE_BND", - "IRE_XBND", - "NCAP_AF", - "AF", - "NCAP_AFA", - "AFA", - "NCAP_AFAC", - "NCAP_AFS", - "NCAP_AFSX", - "NCAP_BND", - "PRC_MARK", - "REG_BNDCST", - "REG_CUMCST", - "S_CAP_BND", - "S_COM_CUMNET", - "S_COM_CUMPRD", - "S_FLO_CUM", - "S_UC_RHS", - "S_UC_RHSR", - "S_UC_RHSRT", - "S_UC_RHSRTS", - "S_UC_RHSTS", - "STGIN_BND", - "STGOUT_BND", - "UC_DYNBND", - "UC_RHS", - "UC_RHSR", - "UC_RHSRT", - "UC_RHSRTS", - "UC_RHST", - "UC_RHSTS", - ], -} - -attr_timeslice_def = { - "DAYNITE": ["ACT_CSTUP"], - "ANNUAL": [ - "ACT_BND", - "ACTBND", - "BNDACT", - "ACT_EFF", - "CEFF", - "CEFF-O", - "CEFF-I", - "CEFFICIENCY", - "EFFICIENCY", - "EFF", - "ACT_FLO", - "ACT_UPS", - "BS_BNDPRS", - "BS_DELTA", - "BS_DEMDET", - "BS_MAINT", - "BS_OMEGA", - "BS_RMAX", - "BS_SIGMA", - "COM_BNDNET", - "COM_BNDPRD", - "COM_BPRICE", - "COM_CSTBAL", - "COM_CSTNET", - "COM_CSTPRD", - "COM_ELAST", - "COM_IE", - "COM_SUBNET", - "COM_SUBPRD", - "COM_TAXNET", - "COM_TAXPRD", - "FLO_BND", - "FLO_COST", - "FLO_DELIV", - "DELIV", - "FLO_EFF", - "FLO_EMIS", - "FEMIS", - "ENV_ACT", - "ENVACT", - "FLO_FUNC", - "FLO_SHAR", - "FLOSHAR", - "SHARE", - "SHARE-I", - "SHARE-O", - "FLO_SUB", - "FLO_TAX", - "G_YRFR", - "GR_DEMFR", - "IRE_BND", - "IRE_FLOSUM", - "IRE_PRICE", - "COST", - "IRE_XBND", - "NCAP_AF", - "AF", - "NCAP_AFC", - "AFC", - "NCAP_AFCS", - "NCAP_PKCNT", - "PEAK", - "PRC_FOFF", - "S_UC_RHSRTS", - "S_UC_RHSTS", - "STG_CHRG", - "STG_LOSS", - "STG_SIFT", - "STGIN_BND", - "STGOUT_BND", - "TS_CYCLE", - "UC_ACT", - "UC_COMCON", - "UC_COMNET", - "UC_COMPRD", - "UC_FLO", - "UC_IRE", - "UC_RHSRTS", - "UC_RHSTS", - "VDA_FLOP", - ], -} - def remove_comment_rows(table: datatypes.EmbeddedXlTable) -> datatypes.EmbeddedXlTable: """ @@ -977,19 +680,22 @@ def fill_in_missing_values_table(table): and len(df) > 0 ): isna = df[colname].isna() - for lim in attr_limtype_def.keys(): + for lim in config.veda_attr_defaults["limtype"].keys(): df.loc[ - isna & df["attribute"].str.upper().isin(attr_limtype_def[lim]), + isna + & df["attribute"] + .str.upper() + .isin(config.veda_attr_defaults["limtype"][lim]), colname, ] = lim elif colname == "timeslice" and len(df) > 0 and "attribute" in df.columns: isna = df[colname].isna() - for timeslice in attr_timeslice_def.keys(): + for timeslice in config.veda_attr_defaults["tslvl"].keys(): df.loc[ isna & df["attribute"] .str.upper() - .isin(attr_timeslice_def[timeslice]), + .isin(config.veda_attr_defaults["tslvl"][timeslice]), colname, ] = timeslice elif ( @@ -1215,12 +921,14 @@ def apply_fixups_table(table: datatypes.EmbeddedXlTable): # Populate CommName based on defaults i = ( - df["attribute"].str.upper().isin(attr_com_def.keys()) + df["attribute"] + .str.upper() + .isin(config.veda_attr_defaults["commodity"].keys()) & df["commodity"].isna() ) if len(df[i]) > 0: for attr in df[i]["attribute"].unique(): - for com_in_out in attr_com_def[attr.upper()]: + for com_in_out in config.veda_attr_defaults["commodity"][attr.upper()]: index = i & (df["attribute"] == attr) & (df["commodity"].isna()) if len(df[index]) > 0: df.loc[index, ["commodity"]] = df[index][com_in_out] @@ -2442,7 +2150,7 @@ def convert_aliases( ) -> Dict[str, DataFrame]: # Ensure TIMES names for all attributes replacement_dict = {} - for k, v in aliases_by_attr.items(): + for k, v in config.veda_attr_defaults["aliases"].items(): for alias in v: replacement_dict[alias] = k