Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Marktstammdatenregister (MaStR) #165

Merged
merged 9 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion powerplantmatching/cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def aggregate_units(
df = cliques(df, duplicates)
df = df.groupby("grouped").agg(props_for_groups)

df[str_cols] = df[str_cols].replace("", pd.NA)
# Downcasting in replace is deprecated
with pd.option_context("future.no_silent_downcasting", True):
df[str_cols] = df[str_cols].replace("", pd.NA).infer_objects(copy=False)

df = (
df.assign(
Expand Down
121 changes: 121 additions & 0 deletions powerplantmatching/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,127 @@ def GEM(raw=False, update=False, config=None):
return pd.concat(data, ignore_index=True)


def MASTR(
raw=False,
update=False,
config=None,
):
"""
Get the Marktstammdatenregister (MaStR) dataset.

Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and
contains data on Germany, Austria and Switzerland.

Parameters
----------
raw : Boolean, default False
Whether to return the original dataset
update: bool, default False
Whether to update the data from the url.
config : dict, default None
Add custom specific configuration,
e.g. powerplantmatching.config.get_config(target_countries='Italy'),
defaults to powerplantmatching.config.get_config()

"""
config = get_config() if config is None else config

RENAME_COLUMNS = {
"EinheitMastrNummer": "projectID",
"NameKraftwerk": "Name",
"Land": "Country",
"Nettonennleistung": "Capacity",
"Inbetriebnahmedatum": "DateIn",
"DatumEndgueltigeStilllegung": "DateOut",
"EinheitBetriebsstatus": "Status",
"Laengengrad": "lon",
"Breitengrad": "lat",
}
COUNTRY_MAP = {
"Deutschland": "Germany",
"Österreich": "Austria",
"Schweiz": "Switzerland",
}
PARSE_COLUMNS = [
"ArtDerWasserkraftanlage",
"Biomasseart",
"Filesuffix",
"Energietraeger",
"Hauptbrennstoff",
"NameStromerzeugungseinheit",
]

fn = get_raw_file("MASTR", update=update, config=config)
file_suffixes = {
"Bioenergy": "biomass.csv",
"Combustion": "combustion.csv",
"Nuclear": "nuclear.csv",
"Hydro": "hydro.csv",
"Wind": "wind.csv",
"Solar": "solar.csv",
}
data_frames = []
with ZipFile(fn, "r") as file:
for fueltype, suffix in file_suffixes.items():
for name in file.namelist():
if name.endswith(suffix):
available_columns = pd.read_csv(file.open(name), nrows=0).columns
target_columns = [
"GeplantesInbetriebnahmedatum",
"ThermischeNutzleistung",
"KwkMastrNummer",
]
target_columns = (
target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
)
usecols = available_columns.intersection(target_columns)
df = pd.read_csv(file.open(name), usecols=usecols).assign(
Filesuffix=fueltype
)
data_frames.append(df)
break
df = pd.concat(data_frames).reset_index(drop=True)

if raw:
return df

status_list = config["MASTR"].get("status", ["In Betrieb"]) # noqa: F841
capacity_threshold_kw = 1000

df = (
df.rename(columns=RENAME_COLUMNS)
.query("Status in @status_list")
.loc[lambda df: df.Capacity > capacity_threshold_kw]
.assign(
projectID=lambda df: "MASTR-" + df.projectID,
Country=lambda df: df.Country.map(COUNTRY_MAP),
Capacity=lambda df: df.Capacity / 1e3, # kW to MW
DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
)
.assign(
DateIn=lambda df: df["DateIn"].combine_first(
pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year
),
)
.pipe(
gather_specifications,
config=config,
parse_columns=PARSE_COLUMNS,
)
.assign(
Set=lambda df: df["Set"].where(
df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
),
)
.pipe(clean_name)
.pipe(set_column_name, "MASTR")
.pipe(config_filter, config)
)

return df


# deprecated alias for GGPT
@deprecated(
deprecated_in="0.5.5",
Expand Down
39 changes: 26 additions & 13 deletions powerplantmatching/package_data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ matching_sources:
- BEYONDCOAL: Fueltype != 'Solar'
- WIKIPEDIA: Fueltype != 'Solar'
- GEM
- MASTR

# fully_included_sources, these sources are included even without match to the final dataset
fully_included_sources:
Expand All @@ -34,7 +35,8 @@ fully_included_sources:
- JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
- OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
- BEYONDCOAL
- GEM
- GEM: Country != 'Germany' or Fueltype == 'Solar'
- MASTR


parallel_duke_processes: false
Expand Down Expand Up @@ -202,6 +204,14 @@ GHPT:
status: ["operating", "retired", "construction"]
fn: Global-Hydropower-Tracker-April-2024.xlsx
url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx

MASTR:
net_capacity: true
reliability_score: 8
status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"]
fn: bnetza_open_mastr_2023-08-08_B.zip
url: https://zenodo.org/records/8225106/files/bnetza_open_mastr_2023-08-08_B.zip

# ---------------------------------------------------------------------------- #
# Data Structure Config #
# ---------------------------------------------------------------------------- #
Expand Down Expand Up @@ -269,8 +279,8 @@ target_fueltypes:
# given by the list. An empty string results in a regex expression containing only the key.
# Parsed of representatives at the top may be overwritten by representatives further below.
Other: ".*"
Solid Biomass: [biological, bioenergy, agricultural, wood, biomass]
Biogas: [biogas]
Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse]
Biogas: [biogas, biomethan, gasförmige biomasse]
Nuclear: [nuclear]
Natural Gas:
[
Expand All @@ -282,6 +292,8 @@ target_fueltypes:
combined cycle,
fossil gas,
mixed fossil fuels,
erdgas,
andere gase,
]
Hydro:
[
Expand All @@ -293,13 +305,14 @@ target_fueltypes:
hydro,
hydroelectric,
wasserkraft,
wasser,
]
Hard Coal: [coal, coke]
Lignite: [brown coal, lignite, peat]
Oil: [oil, diesel]
Hard Coal: [coal, coke, steinkohle]
Lignite: [brown coal, lignite, peat, braunkohle]
Oil: [oil, diesel, mineralölprodukte]
Geothermal: ""
Solar: ""
Waste: ""
Waste: ["abfall.*", "waste"]
Wind: ""
Battery: [Electro-chemical, battery]
target_sets:
Expand Down Expand Up @@ -328,12 +341,12 @@ target_technologies:
# A list will be converted to a regex expression matching all words (case-insensitive)
# given by the list. An empty string results in a regex expression containing only the key.
# Parsed of representatives at the top may be overwritten by representatives further below.
CCGT: [ccgt, gas, natural gas]
OCGT: [ocgt]
Steam Turbine: [steam, turbine]
Combustion Engine: [combustion engine]
Run-Of-River: [run-off, run off, run of river, run-of-river, ror]
Pumped Storage: [pumped hydro, pumped]
CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel]
OCGT: [ocgt, gasturbinen ohne abhitzekessel]
Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage]
Pumped Storage: [pumped hydro, pumped, speicherwasseranlage]
Reservoir: ""
Marine: ""
Onshore: ""
Expand Down
Loading