diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index d0eefa7..3fbb3c5 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -445,7 +445,9 @@ def aggregate_units( df = cliques(df, duplicates) df = df.groupby("grouped").agg(props_for_groups) - df[str_cols] = df[str_cols].replace("", pd.NA) + # Downcasting in replace is deprecated + with pd.option_context("future.no_silent_downcasting", True): + df[str_cols] = df[str_cols].replace("", pd.NA).infer_objects(copy=False) df = ( df.assign( diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 62e5775..89bfaf2 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2144,6 +2144,127 @@ def GEM(raw=False, update=False, config=None): return pd.concat(data, ignore_index=True) +def MASTR( + raw=False, + update=False, + config=None, +): + """ + Get the Marktstammdatenregister (MaStR) dataset. + + Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and + contains data on Germany, Austria and Switzerland. + + Parameters + ---------- + raw : Boolean, default False + Whether to return the original dataset + update: bool, default False + Whether to update the data from the url. + config : dict, default None + Add custom specific configuration, + e.g. powerplantmatching.config.get_config(target_countries='Italy'), + defaults to powerplantmatching.config.get_config() + + """ + config = get_config() if config is None else config + + RENAME_COLUMNS = { + "EinheitMastrNummer": "projectID", + "NameKraftwerk": "Name", + "Land": "Country", + "Nettonennleistung": "Capacity", + "Inbetriebnahmedatum": "DateIn", + "DatumEndgueltigeStilllegung": "DateOut", + "EinheitBetriebsstatus": "Status", + "Laengengrad": "lon", + "Breitengrad": "lat", + } + COUNTRY_MAP = { + "Deutschland": "Germany", + "Österreich": "Austria", + "Schweiz": "Switzerland", + } + PARSE_COLUMNS = [ + "ArtDerWasserkraftanlage", + "Biomasseart", + "Filesuffix", + "Energietraeger", + "Hauptbrennstoff", + "NameStromerzeugungseinheit", + ] + + fn = get_raw_file("MASTR", update=update, config=config) + file_suffixes = { + "Bioenergy": "biomass.csv", + "Combustion": "combustion.csv", + "Nuclear": "nuclear.csv", + "Hydro": "hydro.csv", + "Wind": "wind.csv", + "Solar": "solar.csv", + } + data_frames = [] + with ZipFile(fn, "r") as file: + for fueltype, suffix in file_suffixes.items(): + for name in file.namelist(): + if name.endswith(suffix): + available_columns = pd.read_csv(file.open(name), nrows=0).columns + target_columns = [ + "GeplantesInbetriebnahmedatum", + "ThermischeNutzleistung", + "KwkMastrNummer", + ] + target_columns = ( + target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys()) + ) + usecols = available_columns.intersection(target_columns) + df = pd.read_csv(file.open(name), usecols=usecols).assign( + Filesuffix=fueltype + ) + data_frames.append(df) + break + df = pd.concat(data_frames).reset_index(drop=True) + + if raw: + return df + + status_list = config["MASTR"].get("status", ["In Betrieb"]) # noqa: F841 + capacity_threshold_kw = 1000 + + df = ( + df.rename(columns=RENAME_COLUMNS) + .query("Status in @status_list") + .loc[lambda df: df.Capacity > capacity_threshold_kw] + .assign( + projectID=lambda df: "MASTR-" + df.projectID, + Country=lambda df: df.Country.map(COUNTRY_MAP), + Capacity=lambda df: df.Capacity / 1e3, # kW to MW + DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year, + DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year, + ) + .assign( + DateIn=lambda df: df["DateIn"].combine_first( + pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year + ), + ) + .pipe( + gather_specifications, + config=config, + parse_columns=PARSE_COLUMNS, + ) + .assign( + Set=lambda df: df["Set"].where( + df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP" + ), + ) + .pipe(clean_name) + .pipe(set_column_name, "MASTR") + .pipe(config_filter, config) + ) + + return df + + # deprecated alias for GGPT @deprecated( deprecated_in="0.5.5", diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 56c0db9..de55eea 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -24,6 +24,7 @@ matching_sources: - BEYONDCOAL: Fueltype != 'Solar' - WIKIPEDIA: Fueltype != 'Solar' - GEM + - MASTR # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: @@ -34,7 +35,8 @@ fully_included_sources: - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway'] - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria'] - BEYONDCOAL - - GEM + - GEM: Country != 'Germany' or Fueltype == 'Solar' + - MASTR parallel_duke_processes: false @@ -202,6 +204,14 @@ GHPT: status: ["operating", "retired", "construction"] fn: Global-Hydropower-Tracker-April-2024.xlsx url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx + +MASTR: + net_capacity: true + reliability_score: 8 + status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"] + fn: bnetza_open_mastr_2023-08-08_B.zip + url: https://zenodo.org/records/8225106/files/bnetza_open_mastr_2023-08-08_B.zip + # ---------------------------------------------------------------------------- # # Data Structure Config # # ---------------------------------------------------------------------------- # @@ -269,8 +279,8 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: [biological, bioenergy, agricultural, wood, biomass] - Biogas: [biogas] + Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse] + Biogas: [biogas, biomethan, gasförmige biomasse] Nuclear: [nuclear] Natural Gas: [ @@ -282,6 +292,8 @@ target_fueltypes: combined cycle, fossil gas, mixed fossil fuels, + erdgas, + andere gase, ] Hydro: [ @@ -293,13 +305,14 @@ target_fueltypes: hydro, hydroelectric, wasserkraft, + wasser, ] - Hard Coal: [coal, coke] - Lignite: [brown coal, lignite, peat] - Oil: [oil, diesel] + Hard Coal: [coal, coke, steinkohle] + Lignite: [brown coal, lignite, peat, braunkohle] + Oil: [oil, diesel, mineralölprodukte] Geothermal: "" Solar: "" - Waste: "" + Waste: ["abfall.*", "waste"] Wind: "" Battery: [Electro-chemical, battery] target_sets: @@ -328,12 +341,12 @@ target_technologies: # A list will be converted to a regex expression matching all words (case-insensitive) # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. - CCGT: [ccgt, gas, natural gas] - OCGT: [ocgt] - Steam Turbine: [steam, turbine] - Combustion Engine: [combustion engine] - Run-Of-River: [run-off, run off, run of river, run-of-river, ror] - Pumped Storage: [pumped hydro, pumped] + CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel] + OCGT: [ocgt, gasturbinen ohne abhitzekessel] + Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor] + Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor] + Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage] + Pumped Storage: [pumped hydro, pumped, speicherwasseranlage] Reservoir: "" Marine: "" Onshore: ""