Skip to content

Commit

Permalink
update GEM data to latest versions (Aug 2024)
Browse files Browse the repository at this point in the history
  • Loading branch information
fneum committed Aug 23, 2024
1 parent a15a43f commit aff8ebb
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 67 deletions.
5 changes: 3 additions & 2 deletions doc/release-notes.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
Release Notes
==============

.. Upcoming Version
.. ----------------
Upcoming Version
----------------

* Update Global Energy Monitor data to latest versions as of 2024-08-23.

Version 0.5.16 (31.08.2024)
----------------
Expand Down
121 changes: 74 additions & 47 deletions powerplantmatching/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1598,22 +1598,24 @@ def GBPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GBPT", update=update, config=config)
df = pd.read_csv(fn, thousands=",")
df = pd.read_excel(fn, sheet_name="Data")

if raw:
return df

RENAME_COLUMNS = {
"Project Name": "Name",
"Project name": "Name",
"Capacity (MW)": "Capacity",
"Operating Status": "Status",
"Operating status": "Status",
"Latitude": "lat",
"Longitude": "lon",
"Start year": "DateIn",
"Unit start year": "DateIn",
"Retired year": "DateOut",
"GEM phase ID": "projectID",
}

status_list = config["GBPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1626,7 +1628,7 @@ def GBPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.assign(Fueltype="Bioenergy")
.assign(Technology="Steam Turbine")
Expand All @@ -1653,7 +1655,7 @@ def GNPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GNPT", update=update, config=config)
df = pd.read_csv(fn, thousands=",")
df = pd.read_excel(fn, sheet_name="Data")

if raw:
return df
Expand All @@ -1664,10 +1666,13 @@ def GNPT(raw=False, update=False, config=None):
"Latitude": "lat",
"Longitude": "lon",
"Start Year": "DateIn",
"Retired Year": "DateOut",
"Retirement Year": "DateOut",
"Country/Area": "Country",
"GEM unit ID": "projectID",
}

status_list = config["GNPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1680,7 +1685,7 @@ def GNPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.assign(Fueltype="Nuclear")
.assign(Technology="Steam Turbine")
Expand Down Expand Up @@ -1708,43 +1713,40 @@ def GCPT(raw=False, update=False, config=None):

config = get_config() if config is None else config
fn = get_raw_file("GCPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="Units")

if raw:
return df

RENAME_COLUMNS = {
"Plant": "Name",
"Plant name": "Name",
"Combustion technology": "Technology",
"Coal type": "Fueltype",
"Capacity (MW)": "Capacity",
"Latitude": "lat",
"Longitude": "lon",
"Year": "DateIn",
"RETIRED": "DateOut",
"Tracker ID": "projectID",
"Start year": "DateIn",
"Retired year": "DateOut",
"Country/Area": "Country",
"GEM unit/phase ID": "projectID",
}
fueltype_dict = {
"bituminous": "Hard Coal",
"lignite": "Lignite",
"unknown": "Hard Coal",
"sub-bit": "Hard Coal",
"bituminous/sub-bit": "Hard Coal",
"wstbituminous": "Hard Coal",
"unknown(ccs90)": "Hard Coal",
"hard": "Hard Coal",
"anth": "Hard Coal",
"lignite(ccs90)": "Lignite",
"sub-bit(ccs90)": "Hard Coal",
"lignite/sub-bit": "Lignite",
"wstcoal": "Hard Coal",
"bituminous(ccs90)": "Hard Coal",
"bituminous(ccs30)": "Hard Coal",
"lignite/bituminous": "Lignite",
"anth/bituminous": "Hard Coal",
"anth/culm": "Hard Coal",
"bituminous/wstbituminous": "Hard Coal",
"subbituminous": "Hard Coal",
"waste coal": "Hard Coal",
"anthracite": "Hard Coal",
"lignite with CCS": "Lignite",
"bituminous with CCS": "Hard Coal",
"subbituminous with CCS": "Hard Coal",
"unknown with CCS": "Hard Coal",
}

planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce")

status_list = config["GCPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1757,7 +1759,8 @@ def GCPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.assign(DateOut=lambda x: x["DateOut"].combine_first(planned_retirement))
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
.pipe(lambda x: x.assign(Technology="Steam Turbine"))
Expand Down Expand Up @@ -1785,21 +1788,24 @@ def GGTPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GGTPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="Data")

if raw:
return df

RENAME_COLUMNS = {
"Project Name": "Name",
"Unit Capacity (MW)": "Capacity",
"Capacity (MW)": "Capacity",
"Latitude": "lat",
"Longitude": "lon",
"Start year": "DateIn",
"Retired year": "DateOut",
"Country/Area": "Country",
"GEM unit ID": "projectID",
}

status_list = config["GGTPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1812,7 +1818,7 @@ def GGTPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.assign(Fueltype="Geothermal")
.assign(Technology="Steam Turbine")
Expand All @@ -1839,7 +1845,7 @@ def GWPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GWPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="Data")

RENAME_COLUMNS = {
"Project Name": "Name",
Expand All @@ -1850,15 +1856,18 @@ def GWPT(raw=False, update=False, config=None):
"Retired year": "DateOut",
"GEM phase ID": "projectID",
"Installation Type": "Technology",
"Country/Area": "Country",
}

technology_dict = {
"onshore": "Onshore",
"offshore hard mount": "Offshore",
"offshore floating": "Offshore",
"offshore mount unknown": "Offshore",
"Onshore": "Onshore",
"Offshore hard mount": "Offshore",
"Offshore floating": "Offshore",
"Offshore mount unknown": "Offshore",
}

status_list = config["GWPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1871,7 +1880,7 @@ def GWPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.pipe(lambda x: x.replace({"Technology": technology_dict}))
.assign(Fueltype="Wind")
Expand Down Expand Up @@ -1899,7 +1908,7 @@ def GSPT(raw=False, update=False, config=None):

config = get_config() if config is None else config
fn = get_raw_file("GSPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="20 MW+")

if raw:
return df
Expand All @@ -1913,13 +1922,17 @@ def GSPT(raw=False, update=False, config=None):
"Retired year": "DateOut",
"Technology Type": "Technology",
"GEM phase ID": "projectID",
"Country/Area": "Country",
}

technology_dict = {
"PV": "PV",
"Solar Thermal": "CSP",
"Assumed PV": "PV",
}

status_list = config["GSPT"].get("status", ["operating"])

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -1932,7 +1945,7 @@ def GSPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.pipe(lambda x: x.replace({"Technology": technology_dict}))
.assign(Fueltype="Solar")
Expand All @@ -1959,30 +1972,33 @@ def GGPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GGPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="Gas & Oil Units")

if raw:
return df

RENAME_COLUMNS = {
"Plant name": "Name",
"Fuel": "Fueltype",
"Capacity elec. (MW)": "Capacity",
"Capacity (MW)": "Capacity",
"Latitude": "lat",
"Longitude": "lon",
"Start year": "DateIn",
"Retired year": "DateOut",
"CHP": "Set",
"GEM location ID": "projectID",
"Fuel": "Fueltype",
}

technology_dict = {
"GT": "Steam Turbine",
"IC": "Steam Turbine",
"CC": "CCGT",
"GT/IC": "Steam Turbine",
"ICCC": "CCGT",
"ISCC": "CCGT",
"ST": "Steam Turbine",
"AFC": "CCGT",
}

set_dict = {
Expand All @@ -1991,6 +2007,9 @@ def GGPT(raw=False, update=False, config=None):
"not found": "PP",
}

status_list = config["GGPT"].get("status", ["operating"])
gas_fuels = ["NG", "LNG", "BU", "LFG", "BG", "BFG", "COG", "CM", "H", "OG"]

df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -2004,11 +2023,18 @@ def GGPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
Capacity=lambda df: pd.to_numeric(df.Capacity, "coerce"),
Fueltype=df["Fueltype"].apply(
lambda s: (
"Natural Gas"
if any(sub in gas_fuels for sub in s.split("/"))
else "Oil"
)
),
)
.query("Status in ['operating','mothballed','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.pipe(lambda x: x.replace({"Technology": technology_dict}))
.pipe(lambda x: x.replace({"Set": set_dict}))
.pipe(lambda x: x.replace({"Set": set_dict}).fillna({"Set": "PP"}))
.assign(Fueltype="Natural Gas")
.pipe(config_filter, config)
)
Expand All @@ -2032,7 +2058,7 @@ def GHPT(raw=False, update=False, config=None):
"""
config = get_config() if config is None else config
fn = get_raw_file("GHPT", update=update, config=config)
df = pd.read_csv(fn)
df = pd.read_excel(fn, sheet_name="Data")

if raw:
return df
Expand All @@ -2055,6 +2081,7 @@ def GHPT(raw=False, update=False, config=None):
"conventional and pumped storage": "Pumped Storage",
"conventional and run-of-river": "Run-Of-River",
}
status_list = config["GHPT"].get("status", ["operating"])
df = df.rename(columns=RENAME_COLUMNS)
df_final = (
df.pipe(clean_name)
Expand All @@ -2067,7 +2094,7 @@ def GHPT(raw=False, update=False, config=None):
lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
)
.query("Status in ['operating','construction']")
.query("Status in @status_list")
.pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
.pipe(lambda x: x.replace({"Technology": technology_dict}))
.assign(Fueltype="Hydro")
Expand Down
Loading

0 comments on commit aff8ebb

Please sign in to comment.