Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Point source visualization and implementation into clustering algorit… #152

Merged
merged 14 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ BufferDeg: 5
LandThreshold: 0.25
OffshoreEmisThreshold: 0

## Point source datasets
## Used for visualizations and state vector clustering
PointSourceDatasets: ["SRON"]

## Clustering Options
ReducedDimensionStateVector: false
ClusteringMethod: "kmeans"
Expand Down Expand Up @@ -179,4 +183,4 @@ PreviewDryRun: true
SpinupDryrun: true
ProductionDryRun: true
PosteriorDryRun: true
BCdryrun: true
BCdryrun: true
4 changes: 4 additions & 0 deletions envs/Harvard-Cannon/config.harvard-cannon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ BufferDeg: 5
LandThreshold: 0.25
OffshoreEmisThreshold: 0

## Point source datasets
## Used for visualizations and state vector clustering
PointSourceDatasets: ["SRON"]

## Clustering Options
ReducedDimensionStateVector: false
ClusteringMethod: "kmeans"
Expand Down
3 changes: 2 additions & 1 deletion envs/Harvard-Cannon/imi_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ dependencies:
- ipykernel=6.15.0
- jupyter=1.0.0
- bottleneck=1.3.5
- boto3=1.26.161
- bs4=4.12.2
- boto3=1.26.161
4 changes: 4 additions & 0 deletions resources/containers/container_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ BufferDeg: 5
LandThreshold: 0.25
OffshoreEmisThreshold: 0

## Point source datasets
## Used for visualizations and state vector clustering
PointSourceDatasets: ["SRON"]

## Clustering Options
ReducedDimensionStateVector: false
ClusteringMethod: "kmeans"
Expand Down
2 changes: 2 additions & 0 deletions run_imi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#SBATCH -n 1
#SBATCH -o "imi_output.log"


# This script will run the Integrated Methane Inversion (IMI) with GEOS-Chem.
# For documentation, see https://imi.readthedocs.io.
#
Expand Down Expand Up @@ -196,6 +197,7 @@ cd $InversionPath
cp $ConfigFile "${RunDirs}/config_${RunName}.yml"

# Upload output to S3 if specified
cd $InversionPath
python src/utilities/s3_upload.py $ConfigFile

exit 0
210 changes: 106 additions & 104 deletions src/components/shell_variable_manifest.md

Large diffs are not rendered by default.

58 changes: 18 additions & 40 deletions src/components/statevector_component/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
import yaml
import xarray as xr
import numpy as np
import pandas as pd
import yaml
import sys

from src.inversion_scripts.point_sources import get_point_source_coordinates
from src.inversion_scripts.imi_preview import (
estimate_averaging_kernel,
map_sensitivities_to_sv,
Expand Down Expand Up @@ -296,41 +295,6 @@ def generate_cluster_pairs(config, sensitivities):
return sorted(cluster_pairs, key=lambda x: x[0])


def read_coordinates(coord_var):
"""
Description:
Read coordinates either from a list of lists or a csv file
arguments:
coord_var [] or String : either a list of coordinates or a csv file
Returns: [[]] : list of [lat, lon] coordinates of floats
"""

# handle path to csv file containg coordinates
if isinstance(coord_var, str):
if not coord_var.endswith(".csv"):
raise Exception(
"ForcedNativeResolutionElements expects either a .csv file or a list of lists."
)
coords_df = pd.read_csv(coord_var)

# check if lat and lon columns are present
if not ("lat" in coords_df.columns and "lon" in coords_df.columns):
raise Exception(
"lat or lon columns are not present in the csv file."
+ " csv file must have lat and lon in header using lowercase."
)
# select lat and lon columns and convert to list of lists
return coords_df[["lat", "lon"]].values.tolist()

# handle list of lists
elif isinstance(coord_var, list):
return coord_var
else:
# Variable is neither a string nor a list
print("Warning: No ForcedNativeResolutionElements specified or invalid format.")
return None


def force_native_res_pixels(config, clusters, sensitivities):
"""
Description:
Expand All @@ -343,10 +307,13 @@ def force_native_res_pixels(config, clusters, sensitivities):
cluster_pairs [(tuple)]: cluster pairings
Returns: [double] : updated sensitivities
"""
coords = read_coordinates(config["ForcedNativeResolutionElements"])
coords = get_point_source_coordinates(config)

if coords is None:
if len(coords) == 0:
# No forced pixels inputted
print(
f"No forced native pixels specified or in {config['PointSourceDatasets']} dataset."
)
return sensitivities

if config["Res"] == "0.25x0.3125":
Expand All @@ -356,6 +323,17 @@ def force_native_res_pixels(config, clusters, sensitivities):
lat_step = 0.5
lon_step = 0.625

for lat, lon in coords:
lon = np.floor(lon / lon_step) * lon_step
lat = np.floor(lat / lat_step) * lat_step

# Remove any duplicate coordinates within the same gridcell.
coords = sorted(set(map(tuple, coords)), reverse=True)
coords = [list(coordinate) for coordinate in coords]

if len(coords) > config["NumberOfElements"]:
coords = coords[0 : config["NumberOfElements"] - 1]

for lat, lon in coords:
binned_lon = np.floor(lon / lon_step) * lon_step
binned_lat = np.floor(lat / lat_step) * lat_step
Expand Down
1 change: 0 additions & 1 deletion src/geoschem_run_scripts/ch4_run.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/bash

##SBATCH -N 1
##SBATCH --mail-type=END

Expand Down
1 change: 0 additions & 1 deletion src/geoschem_run_scripts/run_jacobian_simulations.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/bash

#SBATCH -J {RunName}
#SBATCH -N 1

Expand Down
49 changes: 29 additions & 20 deletions src/inversion_scripts/imi_preview.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#SBATCH -N 1
# SBATCH -N 1

import os
import sys
import yaml
import time
import warnings
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib
import colorcet as cc
import cartopy.crs as ccrs

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import yaml
import os
import datetime
import time
import warnings
import cartopy.crs as ccrs
import colorcet as cc
from joblib import Parallel, delayed
from src.inversion_scripts.point_sources import get_point_source_coordinates
from src.inversion_scripts.utils import (
sum_total_emissions,
count_obs_in_mask,
Expand All @@ -32,12 +33,13 @@
read_tropomi,
read_blended,
)
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)


def get_TROPOMI_data(file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64):
def get_TROPOMI_data(
file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
):
"""
Returns a dict with the lat, lon, xch4, and albedo_swir observations
extracted from the given tropomi file. Filters are applied to remove
Expand Down Expand Up @@ -228,6 +230,7 @@ def imi_preview(
lat_bounds=None,
levels=21,
title="Prior emissions",
point_sources=get_point_source_coordinates(config),
cbar_label="Emissions (kg km$^{-2}$ h$^{-1}$)",
mask=mask,
only_ROI=False,
Expand Down Expand Up @@ -255,6 +258,7 @@ def imi_preview(
mask=mask,
only_ROI=False,
)

plt.savefig(
os.path.join(preview_dir, "preview_observations.png"),
bbox_inches="tight",
Expand Down Expand Up @@ -326,12 +330,16 @@ def imi_preview(
bbox_inches="tight",
dpi=150,
)
expectedDOFS = np.round(sum(a),5)
expectedDOFS = np.round(sum(a), 5)
if expectedDOFS < config["DOFSThreshold"]:
print(f"\nExpected DOFS = {expectedDOFS} are less than DOFSThreshold = {config['DOFSThreshold']}. Exiting.\n")
print("Consider increasing the inversion period, increasing the prior error, or using another prior inventory.\n")
print(
f"\nExpected DOFS = {expectedDOFS} are less than DOFSThreshold = {config['DOFSThreshold']}. Exiting.\n"
)
print(
"Consider increasing the inversion period, increasing the prior error, or using another prior inventory.\n"
)
# if run with sbatch this ensures the exit code is not lost.
file = open(".error_status_file.txt", 'w')
file = open(".error_status_file.txt", "w")
file.write("Error Status: 1")
file.close()
sys.exit(1)
Expand Down Expand Up @@ -441,7 +449,9 @@ def estimate_averaging_kernel(

# Read in and filter tropomi observations (uses parallel processing)
observation_dicts = Parallel(n_jobs=-1)(
delayed(get_TROPOMI_data)(file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64)
delayed(get_TROPOMI_data)(
file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
)
for file_path in tropomi_paths
)
# Remove any problematic observation dicts (eg. corrupted data file)
Expand Down Expand Up @@ -493,12 +503,11 @@ def process(i):

# unpack list of tuples into individual lists
emissions, L, num_obs = [list(item) for item in zip(*result)]

if np.sum(num_obs) < 1:
sys.exit("Error: No observations found in region of interest")
outstring2 = f"Found {np.sum(num_obs)} observations in the region of interest"


# ----------------------------------
# Estimate information content
# ----------------------------------
Expand All @@ -507,7 +516,7 @@ def process(i):
emissions = np.array(emissions)
m = np.array(num_obs) # Number of observations per state vector element
L = np.array(L)

# If Kalman filter mode, count observations per inversion period
if config["KalmanMode"]:
startday_dt = datetime.datetime.strptime(startday, "%Y%m%d")
Expand Down Expand Up @@ -547,10 +556,10 @@ def process(i):
outstring3 = f"k = {np.round(k,5)} kg-1 m2 s"
outstring4 = f"a = {np.round(a,5)} \n"
outstring5 = f"expectedDOFS: {np.round(sum(a),5)}"

if config["KalmanMode"]:
outstring5 += " per inversion period"

print(outstring3)
print(outstring4)
print(outstring5)
Expand Down
Loading