geoschem · laestrada · Aug 18, 2023 · Jul 20, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/config.yml b/config.yml
@@ -54,6 +54,10 @@ BufferDeg: 5
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
 
+## Point source datasets
+## Used for visualizations and state vector clustering
+PointSourceDatasets: ["SRON"]
+
 ## Clustering Options
 ReducedDimensionStateVector: false
 ClusteringMethod: "kmeans"
@@ -179,4 +183,4 @@ PreviewDryRun: true
 SpinupDryrun: true
 ProductionDryRun: true
 PosteriorDryRun: true
-BCdryrun: true
+BCdryrun: true
diff --git a/envs/Harvard-Cannon/config.harvard-cannon.yml b/envs/Harvard-Cannon/config.harvard-cannon.yml
@@ -54,6 +54,10 @@ BufferDeg: 5
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
 
+## Point source datasets
+## Used for visualizations and state vector clustering
+PointSourceDatasets: ["SRON"]
+
 ## Clustering Options
 ReducedDimensionStateVector: false
 ClusteringMethod: "kmeans"

diff --git a/envs/Harvard-Cannon/imi_env.yml b/envs/Harvard-Cannon/imi_env.yml
@@ -22,4 +22,5 @@ dependencies:
   - ipykernel=6.15.0
   - jupyter=1.0.0
   - bottleneck=1.3.5
-  - boto3=1.26.161
+  - bs4=4.12.2
+  - boto3=1.26.161
diff --git a/resources/containers/container_config.yml b/resources/containers/container_config.yml
@@ -54,6 +54,10 @@ BufferDeg: 5
 LandThreshold: 0.25
 OffshoreEmisThreshold: 0
 
+## Point source datasets
+## Used for visualizations and state vector clustering
+PointSourceDatasets: ["SRON"]
+
 ## Clustering Options
 ReducedDimensionStateVector: false
 ClusteringMethod: "kmeans"

diff --git a/run_imi.sh b/run_imi.sh
@@ -4,6 +4,7 @@
 #SBATCH -n 1
 #SBATCH -o "imi_output.log"
 
+
 # This script will run the Integrated Methane Inversion (IMI) with GEOS-Chem.
 # For documentation, see https://imi.readthedocs.io.
 #
@@ -196,6 +197,7 @@ cd $InversionPath
 cp $ConfigFile "${RunDirs}/config_${RunName}.yml"
 
 # Upload output to S3 if specified
+cd $InversionPath
 python src/utilities/s3_upload.py $ConfigFile
 
 exit 0
diff --git a/src/components/shell_variable_manifest.md b/src/components/shell_variable_manifest.md
diff --git a/src/components/statevector_component/aggregation.py b/src/components/statevector_component/aggregation.py
@@ -5,9 +5,8 @@
 import yaml
 import xarray as xr
 import numpy as np
-import pandas as pd
-import yaml
-import sys
+
+from src.inversion_scripts.point_sources import get_point_source_coordinates
 from src.inversion_scripts.imi_preview import (
     estimate_averaging_kernel,
     map_sensitivities_to_sv,
@@ -296,41 +295,6 @@ def generate_cluster_pairs(config, sensitivities):
     return sorted(cluster_pairs, key=lambda x: x[0])
 
 
-def read_coordinates(coord_var):
-    """
-    Description:
-        Read coordinates either from a list of lists or a csv file
-    arguments:
-        coord_var   [] or String : either a list of coordinates or a csv file
-    Returns:                [[]] : list of [lat, lon] coordinates of floats
-    """
-
-    # handle path to csv file containg coordinates
-    if isinstance(coord_var, str):
-        if not coord_var.endswith(".csv"):
-            raise Exception(
-                "ForcedNativeResolutionElements expects either a .csv file or a list of lists."
-            )
-        coords_df = pd.read_csv(coord_var)
-
-        # check if lat and lon columns are present
-        if not ("lat" in coords_df.columns and "lon" in coords_df.columns):
-            raise Exception(
-                "lat or lon columns are not present in the csv file."
-                + " csv file must have lat and lon in header using lowercase."
-            )
-        # select lat and lon columns and convert to list of lists
-        return coords_df[["lat", "lon"]].values.tolist()
-
-    # handle list of lists
-    elif isinstance(coord_var, list):
-        return coord_var
-    else:
-        # Variable is neither a string nor a list
-        print("Warning: No ForcedNativeResolutionElements specified or invalid format.")
-        return None
-
-
 def force_native_res_pixels(config, clusters, sensitivities):
     """
     Description:
@@ -343,10 +307,13 @@ def force_native_res_pixels(config, clusters, sensitivities):
         cluster_pairs    [(tuple)]: cluster pairings
     Returns:             [double] : updated sensitivities
     """
-    coords = read_coordinates(config["ForcedNativeResolutionElements"])
+    coords = get_point_source_coordinates(config)
 
-    if coords is None:
+    if len(coords) == 0:
         # No forced pixels inputted
+        print(
+            f"No forced native pixels specified or in {config['PointSourceDatasets']} dataset."
+        )
         return sensitivities
 
     if config["Res"] == "0.25x0.3125":
@@ -356,6 +323,17 @@ def force_native_res_pixels(config, clusters, sensitivities):
         lat_step = 0.5
         lon_step = 0.625
 
+    for lat, lon in coords:
+        lon = np.floor(lon / lon_step) * lon_step
+        lat = np.floor(lat / lat_step) * lat_step
+
+    # Remove any duplicate coordinates within the same gridcell.
+    coords = sorted(set(map(tuple, coords)), reverse=True)
+    coords = [list(coordinate) for coordinate in coords]
+
+    if len(coords) > config["NumberOfElements"]:
+        coords = coords[0 : config["NumberOfElements"] - 1]
+
     for lat, lon in coords:
         binned_lon = np.floor(lon / lon_step) * lon_step
         binned_lat = np.floor(lat / lat_step) * lat_step

diff --git a/src/geoschem_run_scripts/ch4_run.template b/src/geoschem_run_scripts/ch4_run.template
@@ -1,5 +1,4 @@
 #!/bin/bash
-
 ##SBATCH -N 1
 ##SBATCH --mail-type=END
 

diff --git a/src/geoschem_run_scripts/run_jacobian_simulations.sh b/src/geoschem_run_scripts/run_jacobian_simulations.sh
@@ -1,5 +1,4 @@
 #!/bin/bash
-
 #SBATCH -J {RunName}
 #SBATCH -N 1
 

diff --git a/src/inversion_scripts/imi_preview.py b/src/inversion_scripts/imi_preview.py
@@ -1,24 +1,25 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-#SBATCH -N 1
+# SBATCH -N 1
 
+import os
 import sys
+import yaml
+import time
+import warnings
+import datetime
 import numpy as np
 import xarray as xr
 import pandas as pd
 import matplotlib
+import colorcet as cc
+import cartopy.crs as ccrs
 
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-import yaml
-import os
-import datetime
-import time
-import warnings
-import cartopy.crs as ccrs
-import colorcet as cc
 from joblib import Parallel, delayed
+from src.inversion_scripts.point_sources import get_point_source_coordinates
 from src.inversion_scripts.utils import (
     sum_total_emissions,
     count_obs_in_mask,
@@ -32,12 +33,13 @@
     read_tropomi,
     read_blended,
 )
-import warnings
 
 warnings.filterwarnings("ignore", category=FutureWarning)
 
 
-def get_TROPOMI_data(file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64):
+def get_TROPOMI_data(
+    file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
+):
     """
     Returns a dict with the lat, lon, xch4, and albedo_swir observations
     extracted from the given tropomi file. Filters are applied to remove
@@ -228,6 +230,7 @@ def imi_preview(
         lat_bounds=None,
         levels=21,
         title="Prior emissions",
+        point_sources=get_point_source_coordinates(config),
         cbar_label="Emissions (kg km$^{-2}$ h$^{-1}$)",
         mask=mask,
         only_ROI=False,
@@ -255,6 +258,7 @@ def imi_preview(
         mask=mask,
         only_ROI=False,
     )
+
     plt.savefig(
         os.path.join(preview_dir, "preview_observations.png"),
         bbox_inches="tight",
@@ -326,12 +330,16 @@ def imi_preview(
         bbox_inches="tight",
         dpi=150,
     )
-    expectedDOFS = np.round(sum(a),5)
+    expectedDOFS = np.round(sum(a), 5)
     if expectedDOFS < config["DOFSThreshold"]:
-        print(f"\nExpected DOFS = {expectedDOFS} are less than DOFSThreshold = {config['DOFSThreshold']}. Exiting.\n")
-        print("Consider increasing the inversion period, increasing the prior error, or using another prior inventory.\n")
+        print(
+            f"\nExpected DOFS = {expectedDOFS} are less than DOFSThreshold = {config['DOFSThreshold']}. Exiting.\n"
+        )
+        print(
+            "Consider increasing the inversion period, increasing the prior error, or using another prior inventory.\n"
+        )
         # if run with sbatch this ensures the exit code is not lost.
-        file = open(".error_status_file.txt", 'w')
+        file = open(".error_status_file.txt", "w")
         file.write("Error Status: 1")
         file.close()
         sys.exit(1)
@@ -441,7 +449,9 @@ def estimate_averaging_kernel(
 
     # Read in and filter tropomi observations (uses parallel processing)
     observation_dicts = Parallel(n_jobs=-1)(
-        delayed(get_TROPOMI_data)(file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64)
+        delayed(get_TROPOMI_data)(
+            file_path, BlendedTROPOMI, xlim, ylim, startdate_np64, enddate_np64
+        )
         for file_path in tropomi_paths
     )
     # Remove any problematic observation dicts (eg. corrupted data file)
@@ -493,12 +503,11 @@ def process(i):
 
     # unpack list of tuples into individual lists
     emissions, L, num_obs = [list(item) for item in zip(*result)]
-    
+
     if np.sum(num_obs) < 1:
         sys.exit("Error: No observations found in region of interest")
     outstring2 = f"Found {np.sum(num_obs)} observations in the region of interest"
 
-
     # ----------------------------------
     # Estimate information content
     # ----------------------------------
@@ -507,7 +516,7 @@ def process(i):
     emissions = np.array(emissions)
     m = np.array(num_obs)  # Number of observations per state vector element
     L = np.array(L)
-    
+
     # If Kalman filter mode, count observations per inversion period
     if config["KalmanMode"]:
         startday_dt = datetime.datetime.strptime(startday, "%Y%m%d")
@@ -547,10 +556,10 @@ def process(i):
     outstring3 = f"k = {np.round(k,5)} kg-1 m2 s"
     outstring4 = f"a = {np.round(a,5)} \n"
     outstring5 = f"expectedDOFS: {np.round(sum(a),5)}"
-    
+
     if config["KalmanMode"]:
         outstring5 += " per inversion period"
-        
+
     print(outstring3)
     print(outstring4)
     print(outstring5)