From 279dfe353e52f72249906d53a3398866b3e0d722 Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:35:25 +0200 Subject: [PATCH 1/6] Update data.py Added option to ignore missing directories due to satellite outages. --- goes2go/data.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/goes2go/data.py b/goes2go/data.py index 0c246ea..3399e02 100644 --- a/goes2go/data.py +++ b/goes2go/data.py @@ -114,7 +114,7 @@ def _check_param_inputs(**params): return satellite, product, domain -def _goes_file_df(satellite, product, start, end, bands=None, refresh=True): +def _goes_file_df(satellite, product, start, end, bands=None, refresh=True, ignore_missing=False): """Get list of requested GOES files as pandas.DataFrame. Parameters @@ -140,7 +140,15 @@ def _goes_file_df(satellite, product, start, end, bands=None, refresh=True): # ---------------------------- files = [] for DATE in DATES: - files += fs.ls(f"{satellite}/{product}/{DATE:%Y/%j/%H/}", refresh=refresh) + path = f"{satellite}/{product}/{DATE:%Y/%j/%H/}" + if ignore_missing : + try: + files += fs.ls(path, refresh=refresh) + except FileNotFoundError: + print(f"Ignored missing dir: {path}") + else: + files += fs.ls(path, refresh=refresh) + # Build a table of the files # -------------------------- From 078eafbef23b7ae86bcf2ee0ded844497ed96c7e Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:40:55 +0200 Subject: [PATCH 2/6] Update NEW.py Added option to ignore FileNotFoundError due to satellite outages. --- goes2go/NEW.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/goes2go/NEW.py b/goes2go/NEW.py index a1f0e8e..a5ad755 100644 --- a/goes2go/NEW.py +++ b/goes2go/NEW.py @@ -257,7 +257,7 @@ def single_point_timerange(self, latitude, longitude, start=None, end=None, rece **kwargs, ) - def df(self, start, end, refresh=True): + def df(self, start, end, refresh=True, ignore_missing=False): """Get list of requested GOES files as pandas.DataFrame. Parameters @@ -267,6 +267,9 @@ def df(self, start, end, refresh=True): refresh : bool Refresh the s3fs.S3FileSystem object when files are listed. Default True will refresh and not use a cached list. + ignore_missing : bool + Ignore FileNotFoundError if there is missing data from + a satellite outage. """ return _goes_file_df( self.satellite, @@ -275,4 +278,5 @@ def df(self, start, end, refresh=True): end=end, bands=self.bands, refresh=refresh, + ignore_missing=ignore_missing, ) From 0abddfd18c631a5071589876ad87fd8aa726ee5e Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:45:01 +0200 Subject: [PATCH 3/6] Update __init__.py Added support in config to ignore_missing for satellite outages. --- goes2go/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/goes2go/__init__.py b/goes2go/__init__.py index 813fe33..de50086 100644 --- a/goes2go/__init__.py +++ b/goes2go/__init__.py @@ -71,6 +71,7 @@ def _expand(self): overwrite = false max_cpus = 1 s3_refresh = true +ignore_missing = false verbose = true ["timerange"] From f54c5609eb7d2ebe43d39da37c5178a69776c715 Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:54:05 +0200 Subject: [PATCH 4/6] Update data.py Added further support for ignore_missing. --- goes2go/data.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/goes2go/data.py b/goes2go/data.py index 3399e02..434747b 100644 --- a/goes2go/data.py +++ b/goes2go/data.py @@ -332,6 +332,7 @@ def goes_timerange( max_cpus=config["timerange"].get("max_cpus"), bands=None, s3_refresh=config["timerange"].get("s3_refresh"), + ignore_missing=config["timerange"].get("ignore_missing"), verbose=config["timerange"].get("verbose", True), ): """ @@ -420,7 +421,7 @@ def goes_timerange( start = datetime.utcnow() - recent end = datetime.utcnow() - df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh) + df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh, ignore_missing=ignore_missing) if download: _download(df, save_dir=save_dir, overwrite=overwrite, verbose=verbose) @@ -465,6 +466,7 @@ def goes_single_point_timerange( max_cpus=config["timerange"].get("max_cpus"), bands=None, s3_refresh=config["timerange"].get("s3_refresh"), + ignore_missing=config["timerange"].get("ignore_missing"), verbose=config["timerange"].get("verbose", True), ): """ @@ -557,7 +559,7 @@ def goes_single_point_timerange( start = datetime.utcnow() - recent end = datetime.utcnow() - df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh) + df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh, ignore_missing=ignore_missing) if download: _download(df, save_dir=save_dir, overwrite=overwrite, verbose=verbose) @@ -585,6 +587,7 @@ def goes_latest( save_dir=config["latest"].get("save_dir"), bands=None, s3_refresh=config["latest"].get("s3_refresh"), + ignore_missing=config["latest"].get("ignore_missing"), verbose=config["latest"].get("verbose", True), ): """ @@ -645,7 +648,7 @@ def goes_latest( start = datetime.utcnow() - timedelta(hours=1) end = datetime.utcnow() - df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh) + df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh, ignore_missing=ignore_missing) # Filter for specific mesoscale domain if domain is not None and domain.upper() in ["M1", "M2"]: @@ -677,6 +680,7 @@ def goes_nearesttime( save_dir=config["nearesttime"].get("save_dir"), bands=None, s3_refresh=config["nearesttime"].get("s3_refresh"), + ignore_missing=config["nearesttime"].get("ignore_missing"), verbose=config["nearesttime"].get("verbose", True), ): """ @@ -746,7 +750,7 @@ def goes_nearesttime( start = attime - within end = attime + within - df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh) + df = _goes_file_df(satellite, product, start, end, bands=bands, refresh=s3_refresh, ignore_missing=ignore_missing) # return df, start, end, attime From aa127d89a11dc9b276356c148194ac3046d54838 Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:55:42 +0200 Subject: [PATCH 5/6] Update __init__.py ignore_missing = true for timerange --- goes2go/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/goes2go/__init__.py b/goes2go/__init__.py index de50086..e74a345 100644 --- a/goes2go/__init__.py +++ b/goes2go/__init__.py @@ -76,6 +76,7 @@ def _expand(self): ["timerange"] s3_refresh = false +ignore_missing = true ["latest"] return_as = "xarray" From 62a041737797d16a7db7e17aef6566b0659ff3d5 Mon Sep 17 00:00:00 2001 From: ifscript <135848220+ifscript@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:59:03 +0200 Subject: [PATCH 6/6] Update data.py Minor readability improvement. --- goes2go/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/goes2go/data.py b/goes2go/data.py index 434747b..5660156 100644 --- a/goes2go/data.py +++ b/goes2go/data.py @@ -141,7 +141,7 @@ def _goes_file_df(satellite, product, start, end, bands=None, refresh=True, igno files = [] for DATE in DATES: path = f"{satellite}/{product}/{DATE:%Y/%j/%H/}" - if ignore_missing : + if ignore_missing is True: try: files += fs.ls(path, refresh=refresh) except FileNotFoundError: