From 3d20b306e8bcf6bc910d670e774cd9151572b34b Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Sat, 19 Jan 2019 15:11:33 +0100 Subject: [PATCH 01/23] fiexd typo --- sentinel_api/sentinel_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index dc5f1a7..f946181 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -282,7 +282,7 @@ def download_all(self, download_dir=None): continue size = int(response.headers['Content-Length'].strip()) if size < 1000000: - print('The found scene is to small: %s (%s)' % (scene['title'], size)) + print('The found scene is too small: %s (%s)' % (scene['title'], size)) print(url) continue From c0469e567cbc85e3eb5357705cea4c499771d3d0 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 21 Jan 2019 11:03:24 +0100 Subject: [PATCH 02/23] made meths _is_valid, _parse_json _filter_overlap, _merge_scenes static --- sentinel_api/sentinel_api.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index dc5f1a7..38507b2 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -320,7 +320,8 @@ def download_all(self, download_dir=None): return {'success': downloaded, 'failed': downloaded_failed} - def _is_valid(self, zipfile, minsize=1000000): + @staticmethod + def _is_valid(zipfile, minsize=1000000): """ Test whether the downloaded zipfile is valid Args: @@ -396,7 +397,8 @@ def _search_request(self, url): print('Error: {}'.format(exc)) return [] - def _parse_json(self, obj): + @staticmethod + def _parse_json(obj): """Parse the JSON result from ESA Data Hub and create a dictionary for each scene Args: @@ -452,7 +454,8 @@ def _filter_existing(self, scenes): filtered.append(scene) return filtered - def _filter_overlap(self, scenes, wkt_geometry, min_overlap=0): + @staticmethod + def _filter_overlap(scenes, wkt_geometry, min_overlap=0): """Filter scenes based on the minimum overlap to the area of interest Args: @@ -479,7 +482,8 @@ def _filter_overlap(self, scenes, wkt_geometry, min_overlap=0): return filtered - def _merge_scenes(self, scenes1, scenes2): + @staticmethod + def _merge_scenes(scenes1, scenes2): """Merge scenes from two different lists using the 'id' keyword Args: From 8611b6058cc08e33d6bd9d9fc0ccc8d2d84a4fa2 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 21 Jan 2019 11:04:54 +0100 Subject: [PATCH 03/23] renamed variable 'path' to 'filename' for several methods --- sentinel_api/sentinel_api.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 38507b2..7ade571 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -232,7 +232,7 @@ def write_results(self, file_type, filename, output=False): Args: file_type: Use 'wget' to write download bash file with wget software, 'json' to write the dictionary object to file, or 'url' to write a file with downloadable URLs - path: Path to file + filename: Path to file output: If True the written file will also be send to stdout (Default: False) """ @@ -504,26 +504,26 @@ def _merge_scenes(scenes1, scenes2): return scenes1 - def _write_json(self, path): + def _write_json(self, filename): """Write JSON representation of scenes list to file Args: - file: Path to file to write in + filename: Path to file to write in """ - with open(path, 'w') as outfile: + with open(filename, 'w') as outfile: json.dump(self.__scenes, outfile) return True - def _write_download_wget(self, path): + def _write_download_wget(self, filename): """Write bash file to download scene URLs based on wget software Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! Args: - file: Path to file to write in + filename: Path to file to write in """ - with open(path, 'w') as outfile: + with open(filename, 'w') as outfile: for scene in self.__scenes: outfile.write('wget -c -T120 --no-check-certificate --user=%s --password=%s -O %s%s.zip "%s"\n' % ( self.__esa_username, self.__esa_password, self.__download_dir, scene['title'], @@ -531,17 +531,17 @@ def _write_download_wget(self, path): )) return None - def _write_download_urls(self, path): + def _write_download_urls(self, filename): """Write URLs of scenes to text file Args: - file: Path to file to write in + filename: Path to file to write in """ - with open(path, 'w') as outfile: + with open(filename, 'w') as outfile: for scene in self.__scenes: outfile.write(scene['url'] + '\n') - return path + return filename ########################################################### From dfcea1f0292b19af057d62b31b27eda67a550a3a Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 21 Jan 2019 11:17:55 +0100 Subject: [PATCH 04/23] explicitly except AttributeError and shapely.errors.WKTReadingError instead of generic Exception, which is not possible in Python3.6 --- sentinel_api/sentinel_api.py | 142 ++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 70 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 7ade571..f670d29 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -22,6 +22,8 @@ import requests from shapely.wkt import loads +from shapely.errors import WKTReadingError + from osgeo import ogr, osr import json import progressbar as pb @@ -31,21 +33,21 @@ class SentinelDownloader(object): """Class to search and download for Sentinel data""" - + __esa_username = None __esa_password = None __esa_api_url = None - + __geometries = [] __scenes = [] __download_dir = './' __data_dirs = [] - + def __init__(self, username, password, api_url='https://scihub.copernicus.eu/apihub/'): self.__esa_api_url = api_url self.__esa_username = username self.__esa_password = password - + def set_download_dir(self, download_dir): """Set directory for check against existing downloaded files and as directory where to download @@ -56,9 +58,9 @@ def set_download_dir(self, download_dir): print('Setting download directory to %s' % download_dir) if not os.path.exists(download_dir): os.makedirs(download_dir) - + self.__download_dir = download_dir - + def set_data_dir(self, data_dir): """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories @@ -68,7 +70,7 @@ def set_data_dir(self, data_dir): """ print('Adding data directory {}'.format(data_dir)) self.__data_dirs.append(data_dir) - + def set_geometries(self, geometries): """Manually set one or more geometries for data search @@ -81,23 +83,23 @@ def set_geometries(self, geometries): # print(geometries) if isinstance(geometries, list): self.__geometries = geometries - + elif isinstance(geometries, str): self.__geometries = [geometries] - + else: raise Exception('geometries parameter needs to be a list or a string') - + # Test first geometry try: loads(self.__geometries[0]) - except Exception, e: + except (AttributeError, WKTReadingError) as e: raise Exception('The first geometry is not valid! Error: %s' % e) - + def get_geometries(self): """Return list of geometries""" return self.__geometries - + def load_sites(self, input_file, verbose=False): """Load features from input file and transform geometries to Lat/Lon (EPSG 4326) @@ -108,20 +110,20 @@ def load_sites(self, input_file, verbose=False): """ print('===========================================================') print('Loading sites from file %s' % input_file) - + if not os.path.exists(input_file): raise Exception('Input file does not exist: %s' % input_file) - + source = ogr.Open(input_file, 0) layer = source.GetLayer() - + in_ref = layer.GetSpatialRef() out_ref = osr.SpatialReference() out_ref.ImportFromEPSG(4326) - + coord_transform = osr.CoordinateTransformation(in_ref, out_ref) geometries = [] - + for feature in layer: geom = feature.GetGeometryRef() geom.Transform(coord_transform) @@ -129,10 +131,10 @@ def load_sites(self, input_file, verbose=False): if verbose: print(geom) geometries.append(geom) - + self.__geometries = geometries print('Found %s features' % len(geometries)) - + def search(self, platform, min_overlap=0, download_dir=None, start_date=None, end_date=None, date_type='beginPosition', **keywords): """Search in ESA Data Hub for scenes with given arguments @@ -162,10 +164,10 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en print('Searching data for platform %s' % platform) if platform not in ['S1A*', 'S1B*', 'S2A*', 'S2B*', 'S3A*', 'S3B*']: raise Exception('platform parameter has to be S1A*, S1B*, S2A*, S2B*, S3A* or S3B*') - + if download_dir is not None: self.set_download_dir(download_dir) - + date_filtering = '' if start_date is not None or end_date is not None: if start_date is None: @@ -185,10 +187,10 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en end_date = datetime.strptime(end_date + ' 23:59:59.999', '%Y-%m-%d %H:%M:%S.%f') \ .strftime('%Y-%m-%dT%H:%M:%S.%fZ') date_filtering = ' AND %s:[%s TO %s]' % (date_type, start_date, end_date) - + for geom in self.__geometries: print('===========================================================') - + index = 0 scenes = [] while True: @@ -202,30 +204,30 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en print('=============================') if len(subscenes) < 100: break - + print('%s scenes after initial search' % len(scenes)) if len(scenes) > 0: scenes = self._filter_existing(scenes) scenes = self._filter_overlap(scenes, geom, min_overlap) print('%s scenes after filtering before merging' % len(scenes)) self.__scenes = self._merge_scenes(self.__scenes, scenes) - + print('===========================================================') print('%s total scenes after merging' % len(self.__scenes)) print('===========================================================') - + def get_scenes(self): """Return searched and filtered scenes""" return self.__scenes - + def print_scenes(self): """Print title of searched and filtered scenes""" - + def sorter(x): return re.findall('[0-9T]{15}', x)[0] - + titles = sorted([x['title'] for x in self.__scenes], key=sorter) print('\n'.join(titles)) - + def write_results(self, file_type, filename, output=False): """Write results to disk in different kind of formats @@ -242,11 +244,11 @@ def write_results(self, file_type, filename, output=False): self._write_json(filename) else: self._write_download_urls(filename) - + if output: with open(filename, 'r') as infile: print(infile.read()) - + def download_all(self, download_dir=None): """Download all scenes @@ -260,17 +262,17 @@ def download_all(self, download_dir=None): """ if download_dir is None: download_dir = self.__download_dir - + downloaded = [] downloaded_failed = [] - + for scene in self.__scenes: url = scene['url'] filename = scene['title'] + '.zip' path = os.path.join(download_dir, filename) print('===========================================================') print('Download file path: %s' % path) - + try: response = requests.get(url, auth=(self.__esa_username, self.__esa_password), stream=True) except requests.exceptions.ConnectionError: @@ -285,14 +287,14 @@ def download_all(self, download_dir=None): print('The found scene is to small: %s (%s)' % (scene['title'], size)) print(url) continue - + print('Size of the scene: %s MB' % (size / 1024 / 1024)) # show in MegaBytes my_bytes = 0 widgets = ["Downloading: ", pb.Bar(marker="*", left="[", right=" "), pb.Percentage(), " ", pb.FileTransferSpeed(), "] ", " of {0}MB".format(str(round(size / 1024 / 1024, 2))[:4])] pbar = pb.ProgressBar(widgets=widgets, maxval=size).start() - + try: down = open(path, 'wb') for buf in response.iter_content(1024): @@ -306,20 +308,20 @@ def download_all(self, download_dir=None): print("\nKeyboard interruption, remove current download and exit execution of script") os.remove(path) sys.exit(0) - + # Check if file is valid print("Check if file is valid: ") valid = self._is_valid(path) - + if not valid: downloaded_failed.append(path) print('invalid file is being deleted.') os.remove(path) else: downloaded.append(path) - + return {'success': downloaded, 'failed': downloaded_failed} - + @staticmethod def _is_valid(zipfile, minsize=1000000): """ @@ -348,7 +350,7 @@ def _is_valid(zipfile, minsize=1000000): else: print('file seems to be valid.') return not corrupt - + def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords): """Format the search URL based on the arguments @@ -364,17 +366,17 @@ def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keyw """ geom = loads(wkt_geometry) bbox = geom.envelope - + query_area = ' AND (footprint:"Intersects(%s)")' % bbox filters = '' for kw in sorted(keywords.keys()): filters += ' AND (%s:%s)' % (kw, keywords[kw]) - + url = os.path.join(self.__esa_api_url, 'search?format=json&rows=100&start=%s&q=%s%s%s%s' % (startindex, platform, date_filtering, query_area, filters)) return url - + def _search_request(self, url): """Do the HTTP request to ESA Data Hub @@ -392,11 +394,11 @@ def _search_request(self, url): print(content.text) return [] return self._parse_json(content.json()) - + except requests.exceptions.RequestException as exc: print('Error: {}'.format(exc)) return [] - + @staticmethod def _parse_json(obj): """Parse the JSON result from ESA Data Hub and create a dictionary for each scene @@ -411,7 +413,7 @@ def _parse_json(obj): if 'entry' not in obj['feed']: print('No results for this feed') return [] - + scenes = obj['feed']['entry'] if not isinstance(scenes, list): scenes = [scenes] @@ -422,20 +424,20 @@ def _parse_json(obj): 'title': scene['title'], 'url': scene['link'][0]['href'] } - + for data in scene['str']: item[data['name']] = data['content'] - + for data in scene['date']: item[data['name']] = data['content'] - + for data in scene['int']: item[data['name']] = data['content'] - + scenes_dict.append(item) - + return scenes_dict - + def _filter_existing(self, scenes): """Filter scenes based on existing files in the define download directory and all further data directories @@ -453,7 +455,7 @@ def _filter_existing(self, scenes): if not any(exist): filtered.append(scene) return filtered - + @staticmethod def _filter_overlap(scenes, wkt_geometry, min_overlap=0): """Filter scenes based on the minimum overlap to the area of interest @@ -468,20 +470,20 @@ def _filter_overlap(scenes, wkt_geometry, min_overlap=0): """ site = loads(wkt_geometry) - + filtered = [] - + for scene in scenes: footprint = loads(scene['footprint']) intersect = site.intersection(footprint) overlap = intersect.area / site.area if overlap > min_overlap or ( - site.area / footprint.area > 1 and intersect.area / footprint.area > min_overlap): + site.area / footprint.area > 1 and intersect.area / footprint.area > min_overlap): scene['_script_overlap'] = overlap * 100 filtered.append(scene) - + return filtered - + @staticmethod def _merge_scenes(scenes1, scenes2): """Merge scenes from two different lists using the 'id' keyword @@ -497,13 +499,13 @@ def _merge_scenes(scenes1, scenes2): existing_ids = [] for scene in scenes1: existing_ids.append(scene['id']) - + for scene in scenes2: if not scene['id'] in existing_ids: scenes1.append(scene) - + return scenes1 - + def _write_json(self, filename): """Write JSON representation of scenes list to file @@ -514,7 +516,7 @@ def _write_json(self, filename): with open(filename, 'w') as outfile: json.dump(self.__scenes, outfile) return True - + def _write_download_wget(self, filename): """Write bash file to download scene URLs based on wget software Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! @@ -530,7 +532,7 @@ def _write_download_wget(self, filename): scene['url'].replace('$', '\$') )) return None - + def _write_download_urls(self, filename): """Write URLs of scenes to text file @@ -569,20 +571,20 @@ def main(username, password): s1.download_all() """ - + s1 = SentinelDownloader(username, password, api_url='https://scihub.copernicus.eu/apihub/') # s1.load_sites('wetlands_v8.shp') s1.set_geometries( 'POLYGON ((13.501756184061247 58.390759025092443,13.617310497771715 58.371827474899703,13.620921570075168 58.27891592167088,13.508978328668151 58.233319081414017,13.382590798047325 58.263723491583974,13.382590798047325 58.263723491583974,13.501756184061247 58.390759025092443))') s1.set_download_dir('./') # default is current directory - + # set additional directories which contain downloaded scenes. # A scene is only going to be downloaded if it does not yet exist in either of the data directories or the download directory. s1.set_data_dir('/path/to/datadir1') s1.set_data_dir('/path/to/datadir2') - + s1.search('S1A*', 0.8, productType='GRD', sensoroperationalmode='IW') s1.write_results(file_type='wget', filename='sentinel_api_download.sh') # use wget, urls or json as type s1.download_all() - + return s1 From e2d85134b242f1393dd7ca1d34fdb21106f25ab7 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 21 Jan 2019 17:05:50 +0100 Subject: [PATCH 05/23] [.gitignore] initial commit --- .gitignore | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e41247 --- /dev/null +++ b/.gitignore @@ -0,0 +1,118 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +.idea/ +dev_* From 373e9aef4b1d8d08b858c1c7b536354fd63e2c41 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 21 Jan 2019 17:46:25 +0100 Subject: [PATCH 06/23] [SentinelDownloader._write_download_wget] fixed bugs - output directory and scene title were not separated - user and password need to be in brackets in case they contain special characters --- sentinel_api/sentinel_api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index f670d29..a34900f 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -527,11 +527,11 @@ def _write_download_wget(self, filename): """ with open(filename, 'w') as outfile: for scene in self.__scenes: - outfile.write('wget -c -T120 --no-check-certificate --user=%s --password=%s -O %s%s.zip "%s"\n' % ( - self.__esa_username, self.__esa_password, self.__download_dir, scene['title'], - scene['url'].replace('$', '\$') - )) - return None + out = 'wget -c -T120 --no-check-certificate --user="{}" --password="{}" -O {}.zip "{}"\n'\ + .format(self.__esa_username, self.__esa_password, + os.path.join(self.__download_dir, scene['title']), scene['url'].replace('$', '\$')) + + outfile.write(out) def _write_download_urls(self, filename): """Write URLs of scenes to text file From 26c3928ad39992a9e3107ccf06f307610365862b Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Tue, 26 Feb 2019 14:14:58 +0100 Subject: [PATCH 07/23] [setup] changed from installing progressbar==2.3 to progressbar2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6ea4cdd..e110a70 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ ], install_requires=['GDAL>=1.11.3', 'Shapely>=1.5.13', - 'progressbar==2.3', + 'progressbar2', 'requests>=2.8.1'], url='https://github.com/jonas-eberle/esa_sentinel.git', author='Jonas Eberle, John Truckenbrodt, Felix Cremer', From 9b7fadc51401ca71047fe33efe9a8f520e014a3c Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Mon, 5 Aug 2019 19:22:11 +0200 Subject: [PATCH 08/23] removed shapely dependency and use spatialist instead --- requirements.txt | 5 ++ sentinel_api/sentinel_api.py | 97 +++++++++++++++++------------------- 2 files changed, 52 insertions(+), 50 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c686838 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +spatialist +progressbar +zlib +requests +json \ No newline at end of file diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 004a4bd..c4a9e2f 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -2,7 +2,6 @@ Sentinel Search & Download API Authors: Jonas Eberle , Felix Cremer , John Truckenbrodt -Libraries needed: Shapely, GDAL/OGR, JSON, Progressbar, Zipfile, Datetime, Requests Example usage: Please see the "main" function at the end of this file TODO: @@ -21,10 +20,11 @@ import sys import requests -from shapely.wkt import loads -from shapely.errors import WKTReadingError +from osgeo import ogr +ogr.UseExceptions() + +from spatialist.vector import Vector, wkt2vector, intersect -from osgeo import ogr, osr import json import progressbar as pb import zipfile as zf @@ -92,48 +92,32 @@ def set_geometries(self, geometries): # Test first geometry try: - loads(self.__geometries[0]) - except (AttributeError, WKTReadingError) as e: + vec = wkt2vector(self.__geometries[0], srs=4326) + except RuntimeError as e: raise Exception('The first geometry is not valid! Error: %s' % e) + finally: + vec = None def get_geometries(self): """Return list of geometries""" return self.__geometries - def load_sites(self, input_file, verbose=False): - """Load features from input file and transform geometries to Lat/Lon (EPSG 4326) + def load_sites(self, input_file): + """ + Load features from input file and transform geometries to Lat/Lon (EPSG 4326) Args: input_file: Path to file that can be read by OGR library - verbose: True if extracted geometries should be printed to console (default: False) """ print('===========================================================') print('Loading sites from file %s' % input_file) - if not os.path.exists(input_file): - raise Exception('Input file does not exist: %s' % input_file) - - source = ogr.Open(input_file, 0) - layer = source.GetLayer() - - in_ref = layer.GetSpatialRef() - out_ref = osr.SpatialReference() - out_ref.ImportFromEPSG(4326) + with Vector(input_file) as vec: + vec.reproject(4326) + self.__geometries = vec.convert2wkt() - coord_transform = osr.CoordinateTransformation(in_ref, out_ref) - geometries = [] - - for feature in layer: - geom = feature.GetGeometryRef() - geom.Transform(coord_transform) - geom = geom.ExportToWkt() - if verbose: - print(geom) - geometries.append(geom) - - self.__geometries = geometries - print('Found %s features' % len(geometries)) + print('Found %s features' % len(self.__geometries)) def search(self, platform, min_overlap=0, download_dir=None, start_date=None, end_date=None, date_type='beginPosition', **keywords): @@ -143,9 +127,9 @@ def search(self, platform, min_overlap=0, download_dir=None, start_date=None, en platform: Define which data to search for (either 'S1A*' for Sentinel-1A or 'S2A*' for Sentinel-2A) min_overlap: Define minimum overlap (0-1) between area of interest and scene footprint (Default: 0) download_dir: Define download directory to filter prior downloaded scenes (Default: None) - startDate: Define starting date of search (Default: None, all data) - endDate: Define ending date of search (Default: None, all data) - dataType: Define the type of the given dates (please select from 'beginPosition', 'endPosition', and + start_date: Define starting date of search (Default: None, all data) + end_date: Define ending date of search (Default: None, all data) + date_type: Define the type of the given dates (please select from 'beginPosition', 'endPosition', and 'ingestionDate') (Default: beginPosition) **keywords: Further OpenSearch arguments can be passed to the query according to the ESA Data Hub Handbook (please see https://scihub.copernicus.eu/twiki/do/view/SciHubUserGuide/3FullTextSearch#Search_Keywords) @@ -364,9 +348,9 @@ def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keyw url: String URL to search for this data """ - geom = loads(wkt_geometry) - bbox = geom.envelope - + with wkt2vector(wkt_geometry, srs=4326) as vec: + bbox = vec.bbox().convert2wkt()[0] + query_area = ' AND (footprint:"Intersects(%s)")' % bbox filters = '' for kw in sorted(keywords.keys()): @@ -377,6 +361,14 @@ def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keyw (startindex, platform, date_filtering, query_area, filters)) return url + @staticmethod + def multipolygon2list(wkt): + geom = ogr.CreateGeometryFromWkt(wkt) + if geom.GetGeometryName() == 'MULTIPOLYGON': + return [x.ExportToWkt() for x in geom] + else: + return [geom.ExportToWkt()] + def _search_request(self, url): """Do the HTTP request to ESA Data Hub @@ -393,7 +385,10 @@ def _search_request(self, url): print('Error: API returned unexpected response {}:'.format(content.status_code)) print(content.text) return [] - return self._parse_json(content.json()) + result = self._parse_json(content.json()) + for item in result: + item['footprint'] = self.multipolygon2list(item['footprint'])[0] + return result except requests.exceptions.RequestException as exc: print('Error: {}'.format(exc)) @@ -469,20 +464,22 @@ def _filter_overlap(scenes, wkt_geometry, min_overlap=0): Filtered list of scenes """ - site = loads(wkt_geometry) - filtered = [] - for scene in scenes: - footprint = loads(scene['footprint']) - intersect = site.intersection(footprint) - overlap = intersect.area / site.area - if overlap > min_overlap or ( - site.area / footprint.area > 1 and intersect.area / footprint.area > min_overlap): - scene['_script_overlap'] = overlap * 100 - filtered.append(scene) - - return filtered + with wkt2vector(wkt_geometry, srs=4326) as vec1: + site_area = vec1.getArea() + for scene in scenes: + with wkt2vector(scene['footprint'], srs=4326) as vec2: + footprint_area = vec2.getArea() + with intersect(vec1, vec2) as inter: + intersect_area = inter.getArea() + overlap = intersect_area / site_area + if overlap > min_overlap or ( + site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): + scene['_script_overlap'] = overlap * 100 + filtered.append(scene) + + return filtered @staticmethod def _merge_scenes(scenes1, scenes2): From 2f2f107f5666100d672a5e2b6855e3507463a781 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 13:49:23 +0200 Subject: [PATCH 09/23] simplified package dependency lists --- requirements.txt | 7 +++---- setup.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index c686838..7b998a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ spatialist -progressbar -zlib -requests -json \ No newline at end of file +progressbar2 +requests>=2.8.1 +gdal>=1.11.3 diff --git a/setup.py b/setup.py index e110a70..9c1d8cb 100644 --- a/setup.py +++ b/setup.py @@ -6,10 +6,10 @@ version='0.5.2', description='ESA Sentinel Search & Download API', classifiers=[ - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python', ], - install_requires=['GDAL>=1.11.3', - 'Shapely>=1.5.13', + install_requires=['gdal>=1.11.3', + 'spatialist', 'progressbar2', 'requests>=2.8.1'], url='https://github.com/jonas-eberle/esa_sentinel.git', From 1c90e38d28777246aef2083be91e7fd4429422f3 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 14:05:57 +0200 Subject: [PATCH 10/23] [LICENSE] updated copyright --- LICENSE.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.txt b/LICENSE.txt index 005c484..b104fb5 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2017, Jonas Eberle +# Copyright (c) 2017-2019, Jonas Eberle & John Truckenbrodt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the From d69270841c3718a35fd9336ee9ad9f38cc5cfc8d Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:28:06 +0200 Subject: [PATCH 11/23] [asf_template.py] initial commit --- sentinel_api/asf_template.py | 596 +++++++++++++++++++++++++++++++++++ 1 file changed, 596 insertions(+) create mode 100755 sentinel_api/asf_template.py diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py new file mode 100755 index 0000000..1b12b89 --- /dev/null +++ b/sentinel_api/asf_template.py @@ -0,0 +1,596 @@ +#!/usr/bin/python + +# Usage: +# +# In a terminal/command line, cd to the directory where this file lives. Then... +# +# With embedded urls: ( download the hardcoded list of files in the 'files =' block below) +# +# python ./download-all-2019-08-07_12-39-51.py +# +# Download all files in a Metalink/CSV: (downloaded from ASF Vertex) +# +# python ./download-all-2019-08-07_12-39-51.py /path/to/downloads.metalink localmetalink.metalink localcsv.csv +# +# Compatibility: python >= 2.6.5, 2.7.5, 3.0 +# +# If downloading from a trusted source with invalid SSL Certs, use --insecure to ignore +# +# For more information on bulk downloads, navigate to: +# https://www.asf.alaska.edu/data-tools/bulk-download/ +# +# +# +# This script was generated by the Alaska Satellite Facility's bulk download service. +# For more information on the service, navigate to: +# http://bulk-download.asf.alaska.edu/help +# + +import sys, csv +import os, os.path +import tempfile, shutil +import re + +import base64 +import time +import getpass +import ssl +import signal + +import xml.etree.ElementTree as ET + +############# +# This next block is a bunch of Python 2/3 compatability + +try: + # Python 2.x Libs + from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError + from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor + + from cookielib import MozillaCookieJar + from StringIO import StringIO + +except ImportError as e: + + # Python 3.x Libs + from urllib.request import build_opener, install_opener, Request, urlopen + from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor + from urllib.error import HTTPError, URLError + + from http.cookiejar import MozillaCookieJar + from io import StringIO + +### +# Global variables intended for cross-thread modification +abort = False + +### +# A routine that handles trapped signals +def signal_handler(sig, frame): + global abort + sys.stderr.output("\n > Caught Signal. Exiting!\n") + abort = True # necessary to cause the program to stop + raise SystemExit # this will only abort the thread that the ctrl+c was caught in + +class bulk_downloader: + def __init__(self): + # List of files to download + self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip" ] + + # Local stash of cookies so we don't always have to ask + self.cookie_jar_path = os.path.join( os.path.expanduser('~'), ".bulk_download_cookiejar.txt") + self.cookie_jar = None + + self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', + 'client': 'BO_n7nTIlMljdvU6kRRB3g', + 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + + # Make sure we can write it our current directory + if os.access(os.getcwd(), os.W_OK) is False: + print ("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + exit(-1) + + # For SSL + self.context = {} + + # Check if user handed in a Metalink or CSV: + if len(sys.argv) > 0: + download_files = [] + input_files = [] + for arg in sys.argv[1:]: + if arg == '--insecure': + try: + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + self.context['context'] = ctx + except AttributeError: + # Python 2.6 won't complain about SSL Validation + pass + + elif arg.endswith('.metalink') or arg.endswith('.csv'): + if os.path.isfile( arg ): + input_files.append( arg ) + if arg.endswith('.metalink'): + new_files = self.process_metalink(arg) + else: + new_files = self.process_csv(arg) + if new_files is not None: + for file_url in (new_files): + download_files.append( file_url ) + else: + print (" > I cannot find the input file you specified: {0}".format(arg)) + else: + print (" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) + + if len(input_files) > 0: + if len(download_files) > 0: + print (" > Processing {0} downloads from {1} input files. ".format(len(download_files), len(input_files))) + self.files = download_files + else: + print (" > I see you asked me to download files from {0} input files, but they had no downloads!".format(len(input_files))) + print (" > I'm super confused and exiting.") + exit(-1) + + # Make sure cookie_jar is good to go! + self.get_cookie() + + # summary + self.total_bytes = 0 + self.total_time = 0 + self.cnt = 0 + self.success = [] + self.failed = [] + self.skipped = [] + + + # Get and validate a cookie + def get_cookie(self): + if os.path.isfile(self.cookie_jar_path): + self.cookie_jar = MozillaCookieJar() + self.cookie_jar.load(self.cookie_jar_path) + + # make sure cookie is still valid + if self.check_cookie(): + print(" > Re-using previous cookie jar.") + return True + else: + print(" > Could not validate old cookie Jar") + + # We don't have a valid cookie, prompt user or creds + print ("No existing URS cookie found, please enter Earthdata username & password:") + print ("(Credentials will not be stored, saved or logged anywhere)") + + # Keep trying 'till user gets the right U:P + while self.check_cookie() is False: + self.get_new_cookie() + + return True + + # Validate cookie before we begin + def check_cookie(self): + + if self.cookie_jar is None: + print (" > Cookiejar is bunk: {0}".format(self.cookie_jar)) + return False + + # File we know is valid, used to validate cookie + file_check = 'https://urs.earthdata.nasa.gov/profile' + + # Apply custom Redirect Hanlder + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + install_opener(opener) + + # Attempt a HEAD request + request = Request(file_check) + request.get_method = lambda : 'HEAD' + try: + print (" > attempting to download {0}".format(file_check)) + response = urlopen(request, timeout=30) + resp_code = response.getcode() + # Make sure we're logged in + if not self.check_cookie_is_logged_in(self.cookie_jar): + return False + + # Save cookiejar + self.cookie_jar.save(self.cookie_jar_path) + + except HTTPError: + # If we ge this error, again, it likely means the user has not agreed to current EULA + print ("\nIMPORTANT: ") + print ("Your user appears to lack permissions to download data from the ASF Datapool.") + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # This return codes indicate the USER has not been approved to download the data + if resp_code in (300, 301, 302, 303): + try: + redir_url = response.info().getheader('Location') + except AttributeError: + redir_url = response.getheader('Location') + + #Funky Test env: + if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): + print ("Cough, cough. It's dusty in this test env!") + return True + + print ("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) + return False + + # These are successes! + if resp_code in (200, 307): + return True + + return False + + def get_new_cookie(self): + # Start by prompting user to input their credentials + + # Another Python2/3 workaround + try: + new_username = raw_input("Username: ") + except NameError: + new_username = input("Username: ") + new_password = getpass.getpass(prompt="Password (will not be displayed): ") + + # Build URS4 Cookie request + auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + self.asf_urs4['redir'] + '&response_type=code&state=' + + try: + #python2 + user_pass = base64.b64encode (bytes(new_username+":"+new_password)) + except TypeError: + #python3 + user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8")) + user_pass = user_pass.decode("utf-8") + + # Authenticate against URS, grab all the cookies + self.cookie_jar = MozillaCookieJar() + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) + + # Watch out cookie rejection! + try: + response = opener.open(request) + except HTTPError as e: + if e.code == 401: + print (" > Username and Password combo was not successful. Please try again.") + return False + else: + # If an error happens here, the user most likely has not confirmed EULA. + print ("\nIMPORTANT: There was an error obtaining a download cookie!") + print ("Your user appears to lack permission to download data from the ASF Datapool.") + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + except URLError as e: + print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") + print ("Try cookie generation later.") + exit(-1) + + # Did we get a cookie? + if self.check_cookie_is_logged_in(self.cookie_jar): + #COOKIE SUCCESS! + self.cookie_jar.save(self.cookie_jar_path) + return True + + # if we aren't successful generating the cookie, nothing will work. Stop here! + print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") + print ("Response was {0}.".format(response.getcode())) + print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # make sure we're logged into URS + def check_cookie_is_logged_in(self, cj): + for cookie in cj: + if cookie.name == 'urs_user_already_logged': + # Only get this cookie if we logged in successfully! + return True + + return False + + + # Download the file + def download_file_with_cookiejar(self, url, file_count, total, recursion=False): + # see if we've already download this file and if it is that it is the correct size + download_file = os.path.basename(url).split('?')[0] + if os.path.isfile(download_file): + try: + request = Request(url) + request.get_method = lambda : 'HEAD' + response = urlopen(request, timeout=30) + remote_size = self.get_total_size(response) + # Check that we were able to derive a size. + if remote_size: + local_size = os.path.getsize(download_file) + if remote_size < (local_size+(local_size*.01)) and remote_size > (local_size-(local_size*.01)): + print (" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) + return None,None + #partial file size wasn't full file size, lets blow away the chunk and start again + print (" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format(download_file)) + os.remove(download_file) + + except ssl.CertificateError as e: + print (" > ERROR: {0}".format(e)) + print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False,None + + except HTTPError as e: + if e.code == 401: + print (" > IMPORTANT: Your user may not have permission to download this type of data!") + else: + print (" > Unknown Error, Could not get file HEAD: {0}".format(e)) + + except URLError as e: + print ("URL Error (from HEAD): {0}, {1}".format( e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False,None + + # attempt https connection + try: + request = Request(url) + response = urlopen(request, timeout=30) + + # Watch for redirect + if response.geturl() != url: + + # See if we were redirect BACK to URS for re-auth. + if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): + + if recursion: + print (" > Entering seemingly endless auth loop. Aborting. ") + return False, None + + # make this easier. If there is no app_type=401, add it + new_auth_url = response.geturl() + if "app_type" not in new_auth_url: + new_auth_url += "&app_type=401" + + print (" > While attempting to download {0}....".format(url)) + print (" > Need to obtain new cookie from {0}".format(new_auth_url)) + old_cookies = [cookie.name for cookie in self.cookie_jar] + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(new_auth_url) + try: + response = opener.open(request) + for cookie in self.cookie_jar: + if cookie.name not in old_cookies: + print (" > Saved new cookie: {0}".format(cookie.name)) + + # A little hack to save session cookies + if cookie.discard: + cookie.expires = int(time.time()) + 60*60*24*30 + print (" > Saving session Cookie that should have been discarded! ") + + self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) + except HTTPError as e: + print ("HTTP Error: {0}, {1}".format( e.code, url)) + return False,None + + # Okay, now we have more cookies! Lets try again, recursively! + print (" > Attempting download again with new cookies!") + return self.download_file_with_cookiejar(url, file_count, total, recursion=True) + + print (" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) + + # seems to be working + print ("({0}/{1}) Downloading {2}".format(file_count, total, url)) + + # Open our local file for writing and build status bar + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + self.chunk_read(response, tf, report_hook=self.chunk_report) + + # Reset download status + sys.stdout.write('\n') + + tempfile_name = tf.name + tf.close() + + #handle errors + except HTTPError as e: + print ("HTTP Error: {0}, {1}".format( e.code, url)) + + if e.code == 401: + print (" > IMPORTANT: Your user does not have permission to download this type of data!") + + if e.code == 403: + print (" > Got a 403 Error trying to download this file. ") + print (" > You MAY need to log in this app and agree to a EULA. ") + + return False,None + + except URLError as e: + print ("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False,None + + except ssl.CertificateError as e: + print (" > ERROR: {0}".format(e)) + print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False,None + + # Return the file size + shutil.copy(tempfile_name, download_file) + os.remove(tempfile_name) + file_size = self.get_total_size(response) + actual_size = os.path.getsize(download_file) + if file_size is None: + # We were unable to calculate file size. + file_size = actual_size + return actual_size,file_size + + def get_redirect_url_from_error(self, error): + find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") + print ("error file was: {}".format(error)) + redirect_url = find_redirect.search(error) + if redirect_url: + print("Found: {0}".format(redirect_url.group(0))) + return (redirect_url.group(0)) + + return None + + + # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook + def chunk_report(self, bytes_so_far, file_size): + if file_size is not None: + percent = float(bytes_so_far) / file_size + percent = round(percent*100, 2) + sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % + (bytes_so_far, file_size, percent)) + else: + # We couldn't figure out the size. + sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) + + # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook + def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): + file_size = self.get_total_size(response) + bytes_so_far = 0 + + while 1: + try: + chunk = response.read(chunk_size) + except: + sys.stdout.write("\n > There was an error reading data. \n") + break + + try: + local_file.write(chunk) + except TypeError: + local_file.write(chunk.decode(local_file.encoding)) + bytes_so_far += len(chunk) + + if not chunk: + break + + if report_hook: + report_hook(bytes_so_far, file_size) + + return bytes_so_far + + def get_total_size(self, response): + try: + file_size = response.info().getheader('Content-Length').strip() + except AttributeError: + try: + file_size = response.getheader('Content-Length').strip() + except AttributeError: + print ("> Problem getting size") + return None + + return int(file_size) + + + # Get download urls from a metalink file + def process_metalink(self, ml_file): + print ("Processing metalink file: {0}".format(ml_file)) + with open(ml_file, 'r') as ml: + xml = ml.read() + + # Hack to remove annoying namespace + it = ET.iterparse(StringIO(xml)) + for _, el in it: + if '}' in el.tag: + el.tag = el.tag.split('}', 1)[1] # strip all namespaces + root = it.root + + dl_urls = [] + ml_files = root.find('files') + for dl in ml_files: + dl_urls.append(dl.find('resources').find('url').text) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + + # Get download urls from a csv file + def process_csv(self, csv_file): + print ("Processing csv file: {0}".format(csv_file)) + + dl_urls = [] + with open(csv_file, 'r') as csvf: + try: + csvr = csv.DictReader(csvf) + for row in csvr: + dl_urls.append(row['URL']) + except csv.Error as e: + print ("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) + return None + except KeyError as e: + print ("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + + # Download all the files in the list + def download_files(self): + for file_name in self.files: + + # make sure we haven't ctrl+c'd or some other abort trap + if abort == True: + raise SystemExit + + # download counter + self.cnt += 1 + + # set a timer + start = time.time() + + # run download + size,total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) + + # calculte rate + end = time.time() + + # stats: + if size is None: + self.skipped.append(file_name) + # Check to see that the download didn't error and is the correct size + elif size is not False and (total_size < (size+(size*.01)) and total_size > (size-(size*.01))): + # Download was good! + elapsed = end - start + elapsed = 1.0 if elapsed < 1 else elapsed + rate = (size/1024**2)/elapsed + + print ("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) + + # add up metrics + self.total_bytes += size + self.total_time += elapsed + self.success.append( {'file':file_name, 'size':size } ) + + else: + print ("There was a problem downloading {0}".format(file_name)) + self.failed.append(file_name) + + def print_summary(self): + # Print summary: + print ("\n\nDownload Summary ") + print ("--------------------------------------------------------------------------------") + print (" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) + for success_file in self.success: + print (" - {0} {1:.2f}MB".format(success_file['file'],(success_file['size']/1024.0**2))) + if len(self.failed) > 0: + print (" Failures: {0} files".format(len(self.failed))) + for failed_file in self.failed: + print (" - {0}".format(failed_file)) + if len(self.skipped) > 0: + print (" Skipped: {0} files".format(len(self.skipped))) + for skipped_file in self.skipped: + print (" - {0}".format(skipped_file)) + if len(self.success) > 0: + print (" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes/1024.0**2)/self.total_time)) + print ("--------------------------------------------------------------------------------") + + +if __name__ == "__main__": + # Setup a signal trap for SIGINT (Ctrl+C) + signal.signal(signal.SIGINT, signal_handler) + + downloader = bulk_downloader() + downloader.download_files() + downloader.print_summary() From db8b615b41c4b05b0b0668e31f60a921f8b04ff8 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:30:18 +0200 Subject: [PATCH 12/23] [asf_template.py] general code appearance improvements --- sentinel_api/asf_template.py | 888 ++++++++++++++++++----------------- 1 file changed, 451 insertions(+), 437 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index 1b12b89..b00543c 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -26,9 +26,12 @@ # http://bulk-download.asf.alaska.edu/help # -import sys, csv -import os, os.path -import tempfile, shutil +import os +import os.path +import csv +import sys +import tempfile +import shutil import re import base64 @@ -43,57 +46,60 @@ # This next block is a bunch of Python 2/3 compatability try: - # Python 2.x Libs - from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError - from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor - - from cookielib import MozillaCookieJar - from StringIO import StringIO + # Python 2.x Libs + from urllib2 import build_opener, install_opener, Request, urlopen, HTTPError + from urllib2 import URLError, HTTPSHandler, HTTPHandler, HTTPCookieProcessor + + from cookielib import MozillaCookieJar + from StringIO import StringIO except ImportError as e: - - # Python 3.x Libs - from urllib.request import build_opener, install_opener, Request, urlopen - from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor - from urllib.error import HTTPError, URLError - - from http.cookiejar import MozillaCookieJar - from io import StringIO + + # Python 3.x Libs + from urllib.request import build_opener, install_opener, Request, urlopen + from urllib.request import HTTPHandler, HTTPSHandler, HTTPCookieProcessor + from urllib.error import HTTPError, URLError + + from http.cookiejar import MozillaCookieJar + from io import StringIO ### # Global variables intended for cross-thread modification abort = False + ### # A routine that handles trapped signals def signal_handler(sig, frame): global abort sys.stderr.output("\n > Caught Signal. Exiting!\n") - abort = True # necessary to cause the program to stop + abort = True # necessary to cause the program to stop raise SystemExit # this will only abort the thread that the ctrl+c was caught in + class bulk_downloader: def __init__(self): # List of files to download - self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip" ] - + self.files = [ + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", + "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip"] + # Local stash of cookies so we don't always have to ask - self.cookie_jar_path = os.path.join( os.path.expanduser('~'), ".bulk_download_cookiejar.txt") + self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None - - self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', - 'client': 'BO_n7nTIlMljdvU6kRRB3g', - 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} - + + self.asf_urs4 = {'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', + 'client': 'BO_n7nTIlMljdvU6kRRB3g', + 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: - print ("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + print("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) exit(-1) - + # For SSL self.context = {} - + # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] @@ -108,489 +114,497 @@ def __init__(self): except AttributeError: # Python 2.6 won't complain about SSL Validation pass - + elif arg.endswith('.metalink') or arg.endswith('.csv'): - if os.path.isfile( arg ): - input_files.append( arg ) + if os.path.isfile(arg): + input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): - download_files.append( file_url ) + download_files.append(file_url) else: - print (" > I cannot find the input file you specified: {0}".format(arg)) + print(" > I cannot find the input file you specified: {0}".format(arg)) else: - print (" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) - + print(" > Command line argument '{0}' makes no sense, ignoring.".format(arg)) + if len(input_files) > 0: if len(download_files) > 0: - print (" > Processing {0} downloads from {1} input files. ".format(len(download_files), len(input_files))) + print(" > Processing {0} downloads from {1} input files. ".format(len(download_files), + len(input_files))) self.files = download_files else: - print (" > I see you asked me to download files from {0} input files, but they had no downloads!".format(len(input_files))) - print (" > I'm super confused and exiting.") + print( + " > I see you asked me to download files from {0} input files, but they had no downloads!".format( + len(input_files))) + print(" > I'm super confused and exiting.") exit(-1) - + # Make sure cookie_jar is good to go! self.get_cookie() - - # summary + + # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] - - + # Get and validate a cookie def get_cookie(self): - if os.path.isfile(self.cookie_jar_path): - self.cookie_jar = MozillaCookieJar() - self.cookie_jar.load(self.cookie_jar_path) - - # make sure cookie is still valid - if self.check_cookie(): - print(" > Re-using previous cookie jar.") - return True - else: - print(" > Could not validate old cookie Jar") - - # We don't have a valid cookie, prompt user or creds - print ("No existing URS cookie found, please enter Earthdata username & password:") - print ("(Credentials will not be stored, saved or logged anywhere)") - - # Keep trying 'till user gets the right U:P - while self.check_cookie() is False: - self.get_new_cookie() - - return True - + if os.path.isfile(self.cookie_jar_path): + self.cookie_jar = MozillaCookieJar() + self.cookie_jar.load(self.cookie_jar_path) + + # make sure cookie is still valid + if self.check_cookie(): + print(" > Re-using previous cookie jar.") + return True + else: + print(" > Could not validate old cookie Jar") + + # We don't have a valid cookie, prompt user or creds + print("No existing URS cookie found, please enter Earthdata username & password:") + print("(Credentials will not be stored, saved or logged anywhere)") + + # Keep trying 'till user gets the right U:P + while self.check_cookie() is False: + self.get_new_cookie() + + return True + # Validate cookie before we begin def check_cookie(self): - - if self.cookie_jar is None: - print (" > Cookiejar is bunk: {0}".format(self.cookie_jar)) - return False - - # File we know is valid, used to validate cookie - file_check = 'https://urs.earthdata.nasa.gov/profile' - - # Apply custom Redirect Hanlder - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - install_opener(opener) - - # Attempt a HEAD request - request = Request(file_check) - request.get_method = lambda : 'HEAD' - try: - print (" > attempting to download {0}".format(file_check)) - response = urlopen(request, timeout=30) - resp_code = response.getcode() - # Make sure we're logged in - if not self.check_cookie_is_logged_in(self.cookie_jar): - return False - - # Save cookiejar - self.cookie_jar.save(self.cookie_jar_path) - - except HTTPError: - # If we ge this error, again, it likely means the user has not agreed to current EULA - print ("\nIMPORTANT: ") - print ("Your user appears to lack permissions to download data from the ASF Datapool.") - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - - # This return codes indicate the USER has not been approved to download the data - if resp_code in (300, 301, 302, 303): - try: - redir_url = response.info().getheader('Location') - except AttributeError: - redir_url = response.getheader('Location') - - #Funky Test env: - if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): - print ("Cough, cough. It's dusty in this test env!") - return True - - print ("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) - return False - - # These are successes! - if resp_code in (200, 307): - return True - - return False - + + if self.cookie_jar is None: + print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) + return False + + # File we know is valid, used to validate cookie + file_check = 'https://urs.earthdata.nasa.gov/profile' + + # Apply custom Redirect Hanlder + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + install_opener(opener) + + # Attempt a HEAD request + request = Request(file_check) + request.get_method = lambda: 'HEAD' + try: + print(" > attempting to download {0}".format(file_check)) + response = urlopen(request, timeout=30) + resp_code = response.getcode() + # Make sure we're logged in + if not self.check_cookie_is_logged_in(self.cookie_jar): + return False + + # Save cookiejar + self.cookie_jar.save(self.cookie_jar_path) + + except HTTPError: + # If we ge this error, again, it likely means the user has not agreed to current EULA + print("\nIMPORTANT: ") + print("Your user appears to lack permissions to download data from the ASF Datapool.") + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + + # This return codes indicate the USER has not been approved to download the data + if resp_code in (300, 301, 302, 303): + try: + redir_url = response.info().getheader('Location') + except AttributeError: + redir_url = response.getheader('Location') + + # Funky Test env: + if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): + print("Cough, cough. It's dusty in this test env!") + return True + + print("Redirect ({0}) occured, invalid cookie value!".format(resp_code)) + return False + + # These are successes! + if resp_code in (200, 307): + return True + + return False + def get_new_cookie(self): - # Start by prompting user to input their credentials - - # Another Python2/3 workaround - try: - new_username = raw_input("Username: ") - except NameError: - new_username = input("Username: ") - new_password = getpass.getpass(prompt="Password (will not be displayed): ") - - # Build URS4 Cookie request - auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + self.asf_urs4['redir'] + '&response_type=code&state=' - - try: - #python2 - user_pass = base64.b64encode (bytes(new_username+":"+new_password)) - except TypeError: - #python3 - user_pass = base64.b64encode (bytes(new_username+":"+new_password, "utf-8")) - user_pass = user_pass.decode("utf-8") - - # Authenticate against URS, grab all the cookies - self.cookie_jar = MozillaCookieJar() - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) - - # Watch out cookie rejection! - try: - response = opener.open(request) - except HTTPError as e: - if e.code == 401: - print (" > Username and Password combo was not successful. Please try again.") - return False - else: - # If an error happens here, the user most likely has not confirmed EULA. - print ("\nIMPORTANT: There was an error obtaining a download cookie!") - print ("Your user appears to lack permission to download data from the ASF Datapool.") - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - except URLError as e: - print ("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") - print ("Try cookie generation later.") - exit(-1) - - # Did we get a cookie? - if self.check_cookie_is_logged_in(self.cookie_jar): - #COOKIE SUCCESS! - self.cookie_jar.save(self.cookie_jar_path) - return True - - # if we aren't successful generating the cookie, nothing will work. Stop here! - print ("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") - print ("Response was {0}.".format(response.getcode())) - print ("\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") - exit(-1) - + # Start by prompting user to input their credentials + + # Another Python2/3 workaround + try: + new_username = raw_input("Username: ") + except NameError: + new_username = input("Username: ") + new_password = getpass.getpass(prompt="Password (will not be displayed): ") + + # Build URS4 Cookie request + auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \ + self.asf_urs4['redir'] + '&response_type=code&state=' + + try: + # python2 + user_pass = base64.b64encode(bytes(new_username + ":" + new_password)) + except TypeError: + # python3 + user_pass = base64.b64encode(bytes(new_username + ":" + new_password, "utf-8")) + user_pass = user_pass.decode("utf-8") + + # Authenticate against URS, grab all the cookies + self.cookie_jar = MozillaCookieJar() + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) + request = Request(auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) + + # Watch out cookie rejection! + try: + response = opener.open(request) + except HTTPError as e: + if e.code == 401: + print(" > Username and Password combo was not successful. Please try again.") + return False + else: + # If an error happens here, the user most likely has not confirmed EULA. + print("\nIMPORTANT: There was an error obtaining a download cookie!") + print("Your user appears to lack permission to download data from the ASF Datapool.") + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + except URLError as e: + print("\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. ") + print("Try cookie generation later.") + exit(-1) + + # Did we get a cookie? + if self.check_cookie_is_logged_in(self.cookie_jar): + # COOKIE SUCCESS! + self.cookie_jar.save(self.cookie_jar_path) + return True + + # if we aren't successful generating the cookie, nothing will work. Stop here! + print("WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again.") + print("Response was {0}.".format(response.getcode())) + print( + "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov") + exit(-1) + # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): - for cookie in cj: - if cookie.name == 'urs_user_already_logged': - # Only get this cookie if we logged in successfully! - return True - - return False - - + for cookie in cj: + if cookie.name == 'urs_user_already_logged': + # Only get this cookie if we logged in successfully! + return True + + return False + # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): - # see if we've already download this file and if it is that it is the correct size - download_file = os.path.basename(url).split('?')[0] - if os.path.isfile(download_file): - try: - request = Request(url) - request.get_method = lambda : 'HEAD' - response = urlopen(request, timeout=30) - remote_size = self.get_total_size(response) - # Check that we were able to derive a size. - if remote_size: - local_size = os.path.getsize(download_file) - if remote_size < (local_size+(local_size*.01)) and remote_size > (local_size-(local_size*.01)): - print (" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) - return None,None - #partial file size wasn't full file size, lets blow away the chunk and start again - print (" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format(download_file)) - os.remove(download_file) - - except ssl.CertificateError as e: - print (" > ERROR: {0}".format(e)) - print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") - return False,None - - except HTTPError as e: - if e.code == 401: - print (" > IMPORTANT: Your user may not have permission to download this type of data!") - else: - print (" > Unknown Error, Could not get file HEAD: {0}".format(e)) - - except URLError as e: - print ("URL Error (from HEAD): {0}, {1}".format( e.reason, url)) - if "ssl.c" in "{0}".format(e.reason): - print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") - return False,None - - # attempt https connection - try: - request = Request(url) - response = urlopen(request, timeout=30) - - # Watch for redirect - if response.geturl() != url: - - # See if we were redirect BACK to URS for re-auth. - if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): - - if recursion: - print (" > Entering seemingly endless auth loop. Aborting. ") - return False, None - - # make this easier. If there is no app_type=401, add it - new_auth_url = response.geturl() - if "app_type" not in new_auth_url: - new_auth_url += "&app_type=401" - - print (" > While attempting to download {0}....".format(url)) - print (" > Need to obtain new cookie from {0}".format(new_auth_url)) - old_cookies = [cookie.name for cookie in self.cookie_jar] - opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) - request = Request(new_auth_url) - try: - response = opener.open(request) - for cookie in self.cookie_jar: - if cookie.name not in old_cookies: - print (" > Saved new cookie: {0}".format(cookie.name)) - - # A little hack to save session cookies - if cookie.discard: - cookie.expires = int(time.time()) + 60*60*24*30 - print (" > Saving session Cookie that should have been discarded! ") - - self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) - except HTTPError as e: - print ("HTTP Error: {0}, {1}".format( e.code, url)) - return False,None - - # Okay, now we have more cookies! Lets try again, recursively! - print (" > Attempting download again with new cookies!") - return self.download_file_with_cookiejar(url, file_count, total, recursion=True) - - print (" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) - - # seems to be working - print ("({0}/{1}) Downloading {2}".format(file_count, total, url)) - - # Open our local file for writing and build status bar - tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') - self.chunk_read(response, tf, report_hook=self.chunk_report) - - # Reset download status - sys.stdout.write('\n') - - tempfile_name = tf.name - tf.close() - - #handle errors - except HTTPError as e: - print ("HTTP Error: {0}, {1}".format( e.code, url)) - - if e.code == 401: - print (" > IMPORTANT: Your user does not have permission to download this type of data!") - - if e.code == 403: - print (" > Got a 403 Error trying to download this file. ") - print (" > You MAY need to log in this app and agree to a EULA. ") - - return False,None - - except URLError as e: - print ("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) - if "ssl.c" in "{0}".format(e.reason): - print ("IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") - return False,None - - except ssl.CertificateError as e: - print (" > ERROR: {0}".format(e)) - print (" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") - return False,None - - # Return the file size - shutil.copy(tempfile_name, download_file) - os.remove(tempfile_name) - file_size = self.get_total_size(response) - actual_size = os.path.getsize(download_file) - if file_size is None: - # We were unable to calculate file size. - file_size = actual_size - return actual_size,file_size - + # see if we've already download this file and if it is that it is the correct size + download_file = os.path.basename(url).split('?')[0] + if os.path.isfile(download_file): + try: + request = Request(url) + request.get_method = lambda: 'HEAD' + response = urlopen(request, timeout=30) + remote_size = self.get_total_size(response) + # Check that we were able to derive a size. + if remote_size: + local_size = os.path.getsize(download_file) + if remote_size < (local_size + (local_size * .01)) and remote_size > ( + local_size - (local_size * .01)): + print(" > Download file {0} exists! \n > Skipping download of {1}. ".format(download_file, url)) + return None, None + # partial file size wasn't full file size, lets blow away the chunk and start again + print(" > Found {0} but it wasn't fully downloaded. Removing file and downloading again.".format( + download_file)) + os.remove(download_file) + + except ssl.CertificateError as e: + print(" > ERROR: {0}".format(e)) + print(" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False, None + + except HTTPError as e: + if e.code == 401: + print(" > IMPORTANT: Your user may not have permission to download this type of data!") + else: + print(" > Unknown Error, Could not get file HEAD: {0}".format(e)) + + except URLError as e: + print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print( + "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False, None + + # attempt https connection + try: + request = Request(url) + response = urlopen(request, timeout=30) + + # Watch for redirect + if response.geturl() != url: + + # See if we were redirect BACK to URS for re-auth. + if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl(): + + if recursion: + print(" > Entering seemingly endless auth loop. Aborting. ") + return False, None + + # make this easier. If there is no app_type=401, add it + new_auth_url = response.geturl() + if "app_type" not in new_auth_url: + new_auth_url += "&app_type=401" + + print(" > While attempting to download {0}....".format(url)) + print(" > Need to obtain new cookie from {0}".format(new_auth_url)) + old_cookies = [cookie.name for cookie in self.cookie_jar] + opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), + HTTPSHandler(**self.context)) + request = Request(new_auth_url) + try: + response = opener.open(request) + for cookie in self.cookie_jar: + if cookie.name not in old_cookies: + print(" > Saved new cookie: {0}".format(cookie.name)) + + # A little hack to save session cookies + if cookie.discard: + cookie.expires = int(time.time()) + 60 * 60 * 24 * 30 + print(" > Saving session Cookie that should have been discarded! ") + + self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) + except HTTPError as e: + print("HTTP Error: {0}, {1}".format(e.code, url)) + return False, None + + # Okay, now we have more cookies! Lets try again, recursively! + print(" > Attempting download again with new cookies!") + return self.download_file_with_cookiejar(url, file_count, total, recursion=True) + + print(" > 'Temporary' Redirect download @ Remote archive:\n > {0}".format(response.geturl())) + + # seems to be working + print("({0}/{1}) Downloading {2}".format(file_count, total, url)) + + # Open our local file for writing and build status bar + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + self.chunk_read(response, tf, report_hook=self.chunk_report) + + # Reset download status + sys.stdout.write('\n') + + tempfile_name = tf.name + tf.close() + + # handle errors + except HTTPError as e: + print("HTTP Error: {0}, {1}".format(e.code, url)) + + if e.code == 401: + print(" > IMPORTANT: Your user does not have permission to download this type of data!") + + if e.code == 403: + print(" > Got a 403 Error trying to download this file. ") + print(" > You MAY need to log in this app and agree to a EULA. ") + + return False, None + + except URLError as e: + print("URL Error (from GET): {0}, {1}, {2}".format(e, e.reason, url)) + if "ssl.c" in "{0}".format(e.reason): + print( + "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error.") + return False, None + + except ssl.CertificateError as e: + print(" > ERROR: {0}".format(e)) + print(" > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag") + return False, None + + # Return the file size + shutil.copy(tempfile_name, download_file) + os.remove(tempfile_name) + file_size = self.get_total_size(response) + actual_size = os.path.getsize(download_file) + if file_size is None: + # We were unable to calculate file size. + file_size = actual_size + return actual_size, file_size + def get_redirect_url_from_error(self, error): - find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") - print ("error file was: {}".format(error)) - redirect_url = find_redirect.search(error) - if redirect_url: - print("Found: {0}".format(redirect_url.group(0))) - return (redirect_url.group(0)) - - return None - - + find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") + print("error file was: {}".format(error)) + redirect_url = find_redirect.search(error) + if redirect_url: + print("Found: {0}".format(redirect_url.group(0))) + return (redirect_url.group(0)) + + return None + # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): - if file_size is not None: - percent = float(bytes_so_far) / file_size - percent = round(percent*100, 2) - sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % - (bytes_so_far, file_size, percent)) - else: - # We couldn't figure out the size. - sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) - + if file_size is not None: + percent = float(bytes_so_far) / file_size + percent = round(percent * 100, 2) + sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % + (bytes_so_far, file_size, percent)) + else: + # We couldn't figure out the size. + sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) + # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): - file_size = self.get_total_size(response) - bytes_so_far = 0 - - while 1: - try: - chunk = response.read(chunk_size) - except: - sys.stdout.write("\n > There was an error reading data. \n") - break - - try: - local_file.write(chunk) - except TypeError: - local_file.write(chunk.decode(local_file.encoding)) - bytes_so_far += len(chunk) - - if not chunk: - break - - if report_hook: - report_hook(bytes_so_far, file_size) - - return bytes_so_far - + file_size = self.get_total_size(response) + bytes_so_far = 0 + + while 1: + try: + chunk = response.read(chunk_size) + except: + sys.stdout.write("\n > There was an error reading data. \n") + break + + try: + local_file.write(chunk) + except TypeError: + local_file.write(chunk.decode(local_file.encoding)) + bytes_so_far += len(chunk) + + if not chunk: + break + + if report_hook: + report_hook(bytes_so_far, file_size) + + return bytes_so_far + def get_total_size(self, response): - try: - file_size = response.info().getheader('Content-Length').strip() - except AttributeError: - try: - file_size = response.getheader('Content-Length').strip() - except AttributeError: - print ("> Problem getting size") - return None - - return int(file_size) - - + try: + file_size = response.info().getheader('Content-Length').strip() + except AttributeError: + try: + file_size = response.getheader('Content-Length').strip() + except AttributeError: + print("> Problem getting size") + return None + + return int(file_size) + # Get download urls from a metalink file def process_metalink(self, ml_file): - print ("Processing metalink file: {0}".format(ml_file)) - with open(ml_file, 'r') as ml: - xml = ml.read() - - # Hack to remove annoying namespace - it = ET.iterparse(StringIO(xml)) - for _, el in it: - if '}' in el.tag: - el.tag = el.tag.split('}', 1)[1] # strip all namespaces - root = it.root - - dl_urls = [] - ml_files = root.find('files') - for dl in ml_files: - dl_urls.append(dl.find('resources').find('url').text) - - if len(dl_urls) > 0: - return dl_urls - else: - return None - + print("Processing metalink file: {0}".format(ml_file)) + with open(ml_file, 'r') as ml: + xml = ml.read() + + # Hack to remove annoying namespace + it = ET.iterparse(StringIO(xml)) + for _, el in it: + if '}' in el.tag: + el.tag = el.tag.split('}', 1)[1] # strip all namespaces + root = it.root + + dl_urls = [] + ml_files = root.find('files') + for dl in ml_files: + dl_urls.append(dl.find('resources').find('url').text) + + if len(dl_urls) > 0: + return dl_urls + else: + return None + # Get download urls from a csv file def process_csv(self, csv_file): - print ("Processing csv file: {0}".format(csv_file)) - - dl_urls = [] - with open(csv_file, 'r') as csvf: - try: - csvr = csv.DictReader(csvf) - for row in csvr: - dl_urls.append(row['URL']) - except csv.Error as e: - print ("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) - return None - except KeyError as e: - print ("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) - - if len(dl_urls) > 0: - return dl_urls - else: - return None + print("Processing csv file: {0}".format(csv_file)) + + dl_urls = [] + with open(csv_file, 'r') as csvf: + try: + csvr = csv.DictReader(csvf) + for row in csvr: + dl_urls.append(row['URL']) + except csv.Error as e: + print("WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) + return None + except KeyError as e: + print("WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) + + if len(dl_urls) > 0: + return dl_urls + else: + return None # Download all the files in the list def download_files(self): for file_name in self.files: - + # make sure we haven't ctrl+c'd or some other abort trap if abort == True: - raise SystemExit - + raise SystemExit + # download counter self.cnt += 1 - + # set a timer start = time.time() - + # run download - size,total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) - + size, total_size = self.download_file_with_cookiejar(file_name, self.cnt, len(self.files)) + # calculte rate end = time.time() - + # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size - elif size is not False and (total_size < (size+(size*.01)) and total_size > (size-(size*.01))): + elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed - rate = (size/1024**2)/elapsed - - print ("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) - + rate = (size / 1024 ** 2) / elapsed + + print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(size, elapsed, rate)) + # add up metrics self.total_bytes += size self.total_time += elapsed - self.success.append( {'file':file_name, 'size':size } ) - + self.success.append({'file': file_name, 'size': size}) + else: - print ("There was a problem downloading {0}".format(file_name)) + print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) - + def print_summary(self): # Print summary: - print ("\n\nDownload Summary ") - print ("--------------------------------------------------------------------------------") - print (" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) + print("\n\nDownload Summary ") + print("--------------------------------------------------------------------------------") + print(" Successes: {0} files, {1} bytes ".format(len(self.success), self.total_bytes)) for success_file in self.success: - print (" - {0} {1:.2f}MB".format(success_file['file'],(success_file['size']/1024.0**2))) + print(" - {0} {1:.2f}MB".format(success_file['file'], (success_file['size'] / 1024.0 ** 2))) if len(self.failed) > 0: - print (" Failures: {0} files".format(len(self.failed))) - for failed_file in self.failed: - print (" - {0}".format(failed_file)) + print(" Failures: {0} files".format(len(self.failed))) + for failed_file in self.failed: + print(" - {0}".format(failed_file)) if len(self.skipped) > 0: - print (" Skipped: {0} files".format(len(self.skipped))) - for skipped_file in self.skipped: - print (" - {0}".format(skipped_file)) + print(" Skipped: {0} files".format(len(self.skipped))) + for skipped_file in self.skipped: + print(" - {0}".format(skipped_file)) if len(self.success) > 0: - print (" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes/1024.0**2)/self.total_time)) - print ("--------------------------------------------------------------------------------") - + print(" Average Rate: {0:.2f}MB/sec".format((self.total_bytes / 1024.0 ** 2) / self.total_time)) + print("--------------------------------------------------------------------------------") + if __name__ == "__main__": # Setup a signal trap for SIGINT (Ctrl+C) signal.signal(signal.SIGINT, signal_handler) - + downloader = bulk_downloader() downloader.download_files() downloader.print_summary() From c16f23a9277427607eafa1e039638487b39f8fc0 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:32:41 +0200 Subject: [PATCH 13/23] [asf_template.py] replaced file list with placeholder for replacement --- sentinel_api/asf_template.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index b00543c..ed9a462 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -80,9 +80,7 @@ def signal_handler(sig, frame): class bulk_downloader: def __init__(self): # List of files to download - self.files = [ - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T045445_20190807T045510_017475_020DCD_A6EF.zip", - "https://datapool.asf.alaska.edu/GRD_HD/SB/S1B_IW_GRDH_1SDV_20190807T043950_20190807T044019_017475_020DCA_CE96.zip"] + self.files = ['placeholder_files'] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") From 99c0355d4e42cebf971548153d69416080d807ae Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 16:34:09 +0200 Subject: [PATCH 14/23] [asf_template.py] new variable targetdir with placeholder string value so that files can be downloaded to specific directory instead of just the location of the script --- sentinel_api/asf_template.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sentinel_api/asf_template.py b/sentinel_api/asf_template.py index ed9a462..99b1896 100755 --- a/sentinel_api/asf_template.py +++ b/sentinel_api/asf_template.py @@ -90,9 +90,11 @@ def __init__(self): 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request'} + self.targetdir = 'placeholder_targetdir' + # Make sure we can write it our current directory - if os.access(os.getcwd(), os.W_OK) is False: - print("WARNING: Cannot write to current path! Check permissions for {0}".format(os.getcwd())) + if os.access(self.targetdir, os.W_OK) is False: + print("WARNING: Cannot write to current path! Check permissions for {0}".format(self.targetdir)) exit(-1) # For SSL @@ -302,7 +304,8 @@ def check_cookie_is_logged_in(self, cj): # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size - download_file = os.path.basename(url).split('?')[0] + download_file_base = os.path.basename(url).split('?')[0] + download_file = os.path.join(self.targetdir, download_file_base) if os.path.isfile(download_file): try: request = Request(url) @@ -391,7 +394,7 @@ def download_file_with_cookiejar(self, url, file_count, total, recursion=False): print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar - tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') + tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir=self.targetdir) self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status From 04f059e73ea58d493541428fc259bc53abd492b6 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 17:39:35 +0200 Subject: [PATCH 15/23] [SentinelDownloader._write_download_asf] new method --- sentinel_api/sentinel_api.py | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index c4a9e2f..f023f2e 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -541,7 +541,42 @@ def _write_download_urls(self, filename): for scene in self.__scenes: outfile.write(scene['url'] + '\n') return filename - + + def _write_download_asf(self, filename): + template = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'asf_template.py') + + with open(template, 'r') as temp: + content = temp.read() + pattern = r'^(?PS1[AB])_' \ + r'(?PS1|S2|S3|S4|S5|S6|IW|EW|WV|EN|N1|N2|N3|N4|N5|N6|IM)_' \ + r'(?PSLC|GRD|OCN)' \ + r'(?P[FHM_])' + errormessage = '[ASF writer] unknown product: {}' + targets = [] + for scene in self.__scenes: + title = scene['title'] + match = re.search(pattern, title) + if match: + meta = match.groupdict() + url = 'https://datapool.asf.alaska.edu' + if meta['product'] == 'SLC': + url += '/SLC' + elif meta['product'] == 'GRD': + url += '/GRD_{}D'.format(meta['subproduct']) + else: + raise RuntimeError(errormessage.format(title)) + url += re.sub(r'(S)1([AB])', r'/\1\2/', meta['sensor']) + url += title + '.zip' + targets.append(url) + else: + raise RuntimeError(errormessage.format(title)) + linebreak = '\n{}"'.format(' ' * 12) + filestring = ('",' + linebreak).join(targets) + replacement = linebreak + filestring + '"' + content = content.replace("'placeholder_files'", replacement) + content = content.replace("placeholder_targetdir", self.__download_dir) + with open(filename, 'w') as out: + out.write(content) ########################################################### # Example use of class From f1d8b171746d137c0aa5e4e2087f3cc08e426831 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Wed, 7 Aug 2019 17:40:02 +0200 Subject: [PATCH 16/23] [SentinelDownloader.write_results] offer new option 'asf' --- sentinel_api/sentinel_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index f023f2e..06b0842 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -226,6 +226,8 @@ def write_results(self, file_type, filename, output=False): self._write_download_wget(filename) elif file_type == 'json': self._write_json(filename) + elif file_type == 'asf': + self._write_download_asf(filename) else: self._write_download_urls(filename) From 572da7322ec8a0e5dad4c96fa71b7afe89e1e679 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 13:56:43 +0200 Subject: [PATCH 17/23] [SentinelDownloader._filter_overlap] fixed bug in case intersect is None --- sentinel_api/sentinel_api.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index c4a9e2f..6821ae2 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -471,9 +471,13 @@ def _filter_overlap(scenes, wkt_geometry, min_overlap=0): for scene in scenes: with wkt2vector(scene['footprint'], srs=4326) as vec2: footprint_area = vec2.getArea() - with intersect(vec1, vec2) as inter: + inter = intersect(vec1, vec2) + if inter is not None: intersect_area = inter.getArea() - overlap = intersect_area / site_area + overlap = intersect_area / site_area + inter.close() + else: + overlap = 0 if overlap > min_overlap or ( site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): scene['_script_overlap'] = overlap * 100 From ca26f639eec7806b9b30fa13f731c276de550058 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 13:58:10 +0200 Subject: [PATCH 18/23] [SentinelDownloader] set parameter min_overlap to 0.001 --- sentinel_api/sentinel_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 6821ae2..12d87c3 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -119,7 +119,7 @@ def load_sites(self, input_file): print('Found %s features' % len(self.__geometries)) - def search(self, platform, min_overlap=0, download_dir=None, start_date=None, end_date=None, + def search(self, platform, min_overlap=0.001, download_dir=None, start_date=None, end_date=None, date_type='beginPosition', **keywords): """Search in ESA Data Hub for scenes with given arguments @@ -452,7 +452,7 @@ def _filter_existing(self, scenes): return filtered @staticmethod - def _filter_overlap(scenes, wkt_geometry, min_overlap=0): + def _filter_overlap(scenes, wkt_geometry, min_overlap=0.001): """Filter scenes based on the minimum overlap to the area of interest Args: From 727839a893f10a636e3ff6256159d0bf1beaf7a5 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 14:03:08 +0200 Subject: [PATCH 19/23] [requirements] explicitly require spatialist>=0.3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7b998a4..3ad0090 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -spatialist +spatialist>=0.3 progressbar2 requests>=2.8.1 gdal>=1.11.3 From 9478b852b4b26722ba318b66dff5048c9feddca4 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 14:04:12 +0200 Subject: [PATCH 20/23] use setuptools_scm for version management --- sentinel_api/__init__.py | 9 ++++++++- sentinel_api/sentinel_api.py | 2 -- setup.py | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sentinel_api/__init__.py b/sentinel_api/__init__.py index 205e195..01de33c 100644 --- a/sentinel_api/__init__.py +++ b/sentinel_api/__init__.py @@ -1 +1,8 @@ -from .sentinel_api import SentinelDownloader \ No newline at end of file +from .sentinel_api import SentinelDownloader + +from pkg_resources import get_distribution, DistributionNotFound +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + # package is not installed + pass diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 12d87c3..63ce3b3 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -8,8 +8,6 @@ - Documentation """ -__version__ = '0.5.1' - ########################################################### # imports ########################################################### diff --git a/setup.py b/setup.py index 9c1d8cb..69ab0b4 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,8 @@ setup(name='sentinel_api', packages=find_packages(), include_package_data=True, - version='0.5.2', + setup_requires=['setuptools_scm'], + use_scm_version=True, description='ESA Sentinel Search & Download API', classifiers=[ 'Programming Language :: Python', From bdd8bd1e7ecf1c8ea12f667893b6646b3a9e53b4 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 15:35:56 +0200 Subject: [PATCH 21/23] [SentinelDownloader] sorted methods alphabetically --- sentinel_api/sentinel_api.py | 540 ++++++++++++++++++----------------- 1 file changed, 271 insertions(+), 269 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 6c6ab0b..0b3c18d 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -19,7 +19,6 @@ import requests from osgeo import ogr -ogr.UseExceptions() from spatialist.vector import Vector, wkt2vector, intersect @@ -28,6 +27,8 @@ import zipfile as zf from datetime import datetime, date +ogr.UseExceptions() + class SentinelDownloader(object): """Class to search and download for Sentinel data""" @@ -46,60 +47,87 @@ def __init__(self, username, password, api_url='https://scihub.copernicus.eu/api self.__esa_username = username self.__esa_password = password - def set_download_dir(self, download_dir): - """Set directory for check against existing downloaded files and as directory where to download - - Args: - download_dir: Path to directory - - """ - print('Setting download directory to %s' % download_dir) - if not os.path.exists(download_dir): - os.makedirs(download_dir) - - self.__download_dir = download_dir - - def set_data_dir(self, data_dir): - """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories + def download_all(self, download_dir=None): + """Download all scenes Args: - data_dir: Path to directory - - """ - print('Adding data directory {}'.format(data_dir)) - self.__data_dirs.append(data_dir) - - def set_geometries(self, geometries): - """Manually set one or more geometries for data search + download_dir: Define a directory where to download the scenes + (Default: Use default from class -> current directory) - Args: - geometries: String or List representation of one or more Wkt Geometries, - Geometries have to be in Lat/Lng, EPSG:4326 projection! + Returns: + Dictionary of failed ('failed') and successfully ('success') downloaded scenes """ - # print('Set geometries:') - # print(geometries) - if isinstance(geometries, list): - self.__geometries = geometries + if download_dir is None: + download_dir = self.__download_dir - elif isinstance(geometries, str): - self.__geometries = [geometries] + downloaded = [] + downloaded_failed = [] - else: - raise Exception('geometries parameter needs to be a list or a string') + for scene in self.__scenes: + url = scene['url'] + filename = scene['title'] + '.zip' + path = os.path.join(download_dir, filename) + print('===========================================================') + print('Download file path: %s' % path) + + try: + response = requests.get(url, auth=(self.__esa_username, self.__esa_password), stream=True) + except requests.exceptions.ConnectionError: + print('Connection Error') + continue + if 'Content-Length' not in response.headers: + print('Content-Length not found') + print(url) + continue + size = int(response.headers['Content-Length'].strip()) + if size < 1000000: + print('The found scene is too small: %s (%s)' % (scene['title'], size)) + print(url) + continue + + print('Size of the scene: %s MB' % (size / 1024 / 1024)) # show in MegaBytes + my_bytes = 0 + widgets = ["Downloading: ", pb.Bar(marker="*", left="[", right=" "), + pb.Percentage(), " ", pb.FileTransferSpeed(), "] ", + " of {0}MB".format(str(round(size / 1024 / 1024, 2))[:4])] + pbar = pb.ProgressBar(widgets=widgets, maxval=size).start() + + try: + down = open(path, 'wb') + for buf in response.iter_content(1024): + if buf: + down.write(buf) + my_bytes += len(buf) + pbar.update(my_bytes) + pbar.finish() + down.close() + except KeyboardInterrupt: + print("\nKeyboard interruption, remove current download and exit execution of script") + os.remove(path) + sys.exit(0) + + # Check if file is valid + print("Check if file is valid: ") + valid = self._is_valid(path) + + if not valid: + downloaded_failed.append(path) + print('invalid file is being deleted.') + os.remove(path) + else: + downloaded.append(path) - # Test first geometry - try: - vec = wkt2vector(self.__geometries[0], srs=4326) - except RuntimeError as e: - raise Exception('The first geometry is not valid! Error: %s' % e) - finally: - vec = None + return {'success': downloaded, 'failed': downloaded_failed} def get_geometries(self): """Return list of geometries""" return self.__geometries + def get_scenes(self): + """Return searched and filtered scenes""" + return self.__scenes + def load_sites(self, input_file): """ Load features from input file and transform geometries to Lat/Lon (EPSG 4326) @@ -117,6 +145,22 @@ def load_sites(self, input_file): print('Found %s features' % len(self.__geometries)) + @staticmethod + def multipolygon2list(wkt): + geom = ogr.CreateGeometryFromWkt(wkt) + if geom.GetGeometryName() == 'MULTIPOLYGON': + return [x.ExportToWkt() for x in geom] + else: + return [geom.ExportToWkt()] + + def print_scenes(self): + """Print title of searched and filtered scenes""" + + def sorter(x): return re.findall('[0-9T]{15}', x)[0] + + titles = sorted([x['title'] for x in self.__scenes], key=sorter) + print('\n'.join(titles)) + def search(self, platform, min_overlap=0.001, download_dir=None, start_date=None, end_date=None, date_type='beginPosition', **keywords): """Search in ESA Data Hub for scenes with given arguments @@ -198,17 +242,55 @@ def search(self, platform, min_overlap=0.001, download_dir=None, start_date=None print('%s total scenes after merging' % len(self.__scenes)) print('===========================================================') - def get_scenes(self): - """Return searched and filtered scenes""" - return self.__scenes + def set_data_dir(self, data_dir): + """Set directory for check against existing downloaded files; this can be repeated multiple times to create a list of data directories + + Args: + data_dir: Path to directory + + """ + print('Adding data directory {}'.format(data_dir)) + self.__data_dirs.append(data_dir) - def print_scenes(self): - """Print title of searched and filtered scenes""" + def set_download_dir(self, download_dir): + """Set directory for check against existing downloaded files and as directory where to download + + Args: + download_dir: Path to directory + + """ + print('Setting download directory to %s' % download_dir) + if not os.path.exists(download_dir): + os.makedirs(download_dir) - def sorter(x): return re.findall('[0-9T]{15}', x)[0] + self.__download_dir = download_dir + + def set_geometries(self, geometries): + """Manually set one or more geometries for data search + + Args: + geometries: String or List representation of one or more Wkt Geometries, + Geometries have to be in Lat/Lng, EPSG:4326 projection! + + """ + # print('Set geometries:') + # print(geometries) + if isinstance(geometries, list): + self.__geometries = geometries - titles = sorted([x['title'] for x in self.__scenes], key=sorter) - print('\n'.join(titles)) + elif isinstance(geometries, str): + self.__geometries = [geometries] + + else: + raise Exception('geometries parameter needs to be a list or a string') + + # Test first geometry + try: + vec = wkt2vector(self.__geometries[0], srs=4326) + except RuntimeError as e: + raise Exception('The first geometry is not valid! Error: %s' % e) + finally: + vec = None def write_results(self, file_type, filename, output=False): """Write results to disk in different kind of formats @@ -233,78 +315,83 @@ def write_results(self, file_type, filename, output=False): with open(filename, 'r') as infile: print(infile.read()) - def download_all(self, download_dir=None): - """Download all scenes + def _filter_existing(self, scenes): + """Filter scenes based on existing files in the define download directory and all further data directories Args: - download_dir: Define a directory where to download the scenes - (Default: Use default from class -> current directory) + scenes: List of scenes to be filtered Returns: - Dictionary of failed ('failed') and successfully ('success') downloaded scenes + Filtered list of scenes """ - if download_dir is None: - download_dir = self.__download_dir - - downloaded = [] - downloaded_failed = [] + filtered = [] + dirs = self.__data_dirs + [self.__download_dir] + for scene in scenes: + exist = [os.path.isfile(os.path.join(dir, scene['title'] + '.zip')) for dir in dirs] + if not any(exist): + filtered.append(scene) + return filtered + + @staticmethod + def _filter_overlap(scenes, wkt_geometry, min_overlap=0.001): + """Filter scenes based on the minimum overlap to the area of interest + + Args: + scenes: List of scenes to filter + wkt_geometry: Wkt Geometry representation of the area of interest + min_overlap: Minimum overlap (0-1) in decimal format between scene geometry and area of interest + + Returns: + Filtered list of scenes + + """ + filtered = [] - for scene in self.__scenes: - url = scene['url'] - filename = scene['title'] + '.zip' - path = os.path.join(download_dir, filename) - print('===========================================================') - print('Download file path: %s' % path) - - try: - response = requests.get(url, auth=(self.__esa_username, self.__esa_password), stream=True) - except requests.exceptions.ConnectionError: - print('Connection Error') - continue - if 'Content-Length' not in response.headers: - print('Content-Length not found') - print(url) - continue - size = int(response.headers['Content-Length'].strip()) - if size < 1000000: - print('The found scene is too small: %s (%s)' % (scene['title'], size)) - print(url) - continue + with wkt2vector(wkt_geometry, srs=4326) as vec1: + site_area = vec1.getArea() + for scene in scenes: + with wkt2vector(scene['footprint'], srs=4326) as vec2: + footprint_area = vec2.getArea() + inter = intersect(vec1, vec2) + if inter is not None: + intersect_area = inter.getArea() + overlap = intersect_area / site_area + inter.close() + else: + overlap = 0 + if overlap > min_overlap or ( + site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): + scene['_script_overlap'] = overlap * 100 + filtered.append(scene) - print('Size of the scene: %s MB' % (size / 1024 / 1024)) # show in MegaBytes - my_bytes = 0 - widgets = ["Downloading: ", pb.Bar(marker="*", left="[", right=" "), - pb.Percentage(), " ", pb.FileTransferSpeed(), "] ", - " of {0}MB".format(str(round(size / 1024 / 1024, 2))[:4])] - pbar = pb.ProgressBar(widgets=widgets, maxval=size).start() - - try: - down = open(path, 'wb') - for buf in response.iter_content(1024): - if buf: - down.write(buf) - my_bytes += len(buf) - pbar.update(my_bytes) - pbar.finish() - down.close() - except KeyboardInterrupt: - print("\nKeyboard interruption, remove current download and exit execution of script") - os.remove(path) - sys.exit(0) - - # Check if file is valid - print("Check if file is valid: ") - valid = self._is_valid(path) - - if not valid: - downloaded_failed.append(path) - print('invalid file is being deleted.') - os.remove(path) - else: - downloaded.append(path) + return filtered + + def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords): + """Format the search URL based on the arguments + + Args: + wkt_geometry: Geometry in Wkt representation + platform: Satellite to search in + dateFiltering: filter of dates + **keywords: Further search parameters from ESA Data Hub + + Returns: + url: String URL to search for this data + + """ + with wkt2vector(wkt_geometry, srs=4326) as vec: + bbox = vec.bbox().convert2wkt()[0] - return {'success': downloaded, 'failed': downloaded_failed} + query_area = ' AND (footprint:"Intersects(%s)")' % bbox + filters = '' + for kw in sorted(keywords.keys()): + filters += ' AND (%s:%s)' % (kw, keywords[kw]) + + url = os.path.join(self.__esa_api_url, + 'search?format=json&rows=100&start=%s&q=%s%s%s%s' % + (startindex, platform, date_filtering, query_area, filters)) + return url @staticmethod def _is_valid(zipfile, minsize=1000000): @@ -335,64 +422,27 @@ def _is_valid(zipfile, minsize=1000000): print('file seems to be valid.') return not corrupt - def _format_url(self, startindex, wkt_geometry, platform, date_filtering, **keywords): - """Format the search URL based on the arguments - - Args: - wkt_geometry: Geometry in Wkt representation - platform: Satellite to search in - dateFiltering: filter of dates - **keywords: Further search parameters from ESA Data Hub - - Returns: - url: String URL to search for this data - - """ - with wkt2vector(wkt_geometry, srs=4326) as vec: - bbox = vec.bbox().convert2wkt()[0] - - query_area = ' AND (footprint:"Intersects(%s)")' % bbox - filters = '' - for kw in sorted(keywords.keys()): - filters += ' AND (%s:%s)' % (kw, keywords[kw]) - - url = os.path.join(self.__esa_api_url, - 'search?format=json&rows=100&start=%s&q=%s%s%s%s' % - (startindex, platform, date_filtering, query_area, filters)) - return url - @staticmethod - def multipolygon2list(wkt): - geom = ogr.CreateGeometryFromWkt(wkt) - if geom.GetGeometryName() == 'MULTIPOLYGON': - return [x.ExportToWkt() for x in geom] - else: - return [geom.ExportToWkt()] - - def _search_request(self, url): - """Do the HTTP request to ESA Data Hub + def _merge_scenes(scenes1, scenes2): + """Merge scenes from two different lists using the 'id' keyword Args: - url: HTTP URL to request + scenes1: List of prior available scenes + scenes2: List of new scenes Returns: - List of scenes (result from _parseJSON method), empty list if an error occurred + Merged list of scenes """ - try: - content = requests.get(url, auth=(self.__esa_username, self.__esa_password), verify=True) - if not content.status_code // 100 == 2: - print('Error: API returned unexpected response {}:'.format(content.status_code)) - print(content.text) - return [] - result = self._parse_json(content.json()) - for item in result: - item['footprint'] = self.multipolygon2list(item['footprint'])[0] - return result + existing_ids = [] + for scene in scenes1: + existing_ids.append(scene['id']) - except requests.exceptions.RequestException as exc: - print('Error: {}'.format(exc)) - return [] + for scene in scenes2: + if not scene['id'] in existing_ids: + scenes1.append(scene) + + return scenes1 @staticmethod def _parse_json(obj): @@ -433,122 +483,34 @@ def _parse_json(obj): return scenes_dict - def _filter_existing(self, scenes): - """Filter scenes based on existing files in the define download directory and all further data directories - - Args: - scenes: List of scenes to be filtered - - Returns: - Filtered list of scenes - - """ - filtered = [] - dirs = self.__data_dirs + [self.__download_dir] - for scene in scenes: - exist = [os.path.isfile(os.path.join(dir, scene['title'] + '.zip')) for dir in dirs] - if not any(exist): - filtered.append(scene) - return filtered - - @staticmethod - def _filter_overlap(scenes, wkt_geometry, min_overlap=0.001): - """Filter scenes based on the minimum overlap to the area of interest - - Args: - scenes: List of scenes to filter - wkt_geometry: Wkt Geometry representation of the area of interest - min_overlap: Minimum overlap (0-1) in decimal format between scene geometry and area of interest - - Returns: - Filtered list of scenes - - """ - filtered = [] - - with wkt2vector(wkt_geometry, srs=4326) as vec1: - site_area = vec1.getArea() - for scene in scenes: - with wkt2vector(scene['footprint'], srs=4326) as vec2: - footprint_area = vec2.getArea() - inter = intersect(vec1, vec2) - if inter is not None: - intersect_area = inter.getArea() - overlap = intersect_area / site_area - inter.close() - else: - overlap = 0 - if overlap > min_overlap or ( - site_area / footprint_area > 1 and intersect_area / footprint_area > min_overlap): - scene['_script_overlap'] = overlap * 100 - filtered.append(scene) - - return filtered - - @staticmethod - def _merge_scenes(scenes1, scenes2): - """Merge scenes from two different lists using the 'id' keyword + def _search_request(self, url): + """Do the HTTP request to ESA Data Hub Args: - scenes1: List of prior available scenes - scenes2: List of new scenes + url: HTTP URL to request Returns: - Merged list of scenes + List of scenes (result from _parseJSON method), empty list if an error occurred """ - existing_ids = [] - for scene in scenes1: - existing_ids.append(scene['id']) - - for scene in scenes2: - if not scene['id'] in existing_ids: - scenes1.append(scene) + try: + content = requests.get(url, auth=(self.__esa_username, self.__esa_password), verify=True) + if not content.status_code // 100 == 2: + print('Error: API returned unexpected response {}:'.format(content.status_code)) + print(content.text) + return [] + result = self._parse_json(content.json()) + for item in result: + item['footprint'] = self.multipolygon2list(item['footprint'])[0] + return result - return scenes1 - - def _write_json(self, filename): - """Write JSON representation of scenes list to file - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - json.dump(self.__scenes, outfile) - return True - - def _write_download_wget(self, filename): - """Write bash file to download scene URLs based on wget software - Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - for scene in self.__scenes: - out = 'wget -c -T120 --no-check-certificate --user="{}" --password="{}" -O {}.zip "{}"\n'\ - .format(self.__esa_username, self.__esa_password, - os.path.join(self.__download_dir, scene['title']), scene['url'].replace('$', '\$')) - - outfile.write(out) - - def _write_download_urls(self, filename): - """Write URLs of scenes to text file - - Args: - filename: Path to file to write in - - """ - with open(filename, 'w') as outfile: - for scene in self.__scenes: - outfile.write(scene['url'] + '\n') - return filename + except requests.exceptions.RequestException as exc: + print('Error: {}'.format(exc)) + return [] def _write_download_asf(self, filename): template = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'asf_template.py') - + with open(template, 'r') as temp: content = temp.read() pattern = r'^(?PS1[AB])_' \ @@ -581,6 +543,46 @@ def _write_download_asf(self, filename): content = content.replace("placeholder_targetdir", self.__download_dir) with open(filename, 'w') as out: out.write(content) + + def _write_download_urls(self, filename): + """Write URLs of scenes to text file + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + for scene in self.__scenes: + outfile.write(scene['url'] + '\n') + return filename + + def _write_download_wget(self, filename): + """Write bash file to download scene URLs based on wget software + Please note: User authentication to ESA Data Hub (username, password) is being stored in plain text! + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + for scene in self.__scenes: + out = 'wget -c -T120 --no-check-certificate --user="{}" --password="{}" -O {}.zip "{}"\n' \ + .format(self.__esa_username, self.__esa_password, + os.path.join(self.__download_dir, scene['title']), scene['url'].replace('$', '\$')) + + outfile.write(out) + + def _write_json(self, filename): + """Write JSON representation of scenes list to file + + Args: + filename: Path to file to write in + + """ + with open(filename, 'w') as outfile: + json.dump(self.__scenes, outfile) + return True + ########################################################### # Example use of class From 4544ac918f10666f52c8a9608687648dc841ee27 Mon Sep 17 00:00:00 2001 From: John Truckenbrodt Date: Thu, 17 Oct 2019 15:40:36 +0200 Subject: [PATCH 22/23] [SentinelDownloader.write_results] extended docstring to new option --- sentinel_api/sentinel_api.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sentinel_api/sentinel_api.py b/sentinel_api/sentinel_api.py index 0b3c18d..79e094c 100644 --- a/sentinel_api/sentinel_api.py +++ b/sentinel_api/sentinel_api.py @@ -296,8 +296,11 @@ def write_results(self, file_type, filename, output=False): """Write results to disk in different kind of formats Args: - file_type: Use 'wget' to write download bash file with wget software, 'json' to write the dictionary object - to file, or 'url' to write a file with downloadable URLs + file_type: the file format to use: + - 'wget': download bash file with wget software + - 'json': write the dictionary object + - 'url': a file with downloadable URLs + - 'asf': a Python script for download from ASF Vertex filename: Path to file output: If True the written file will also be send to stdout (Default: False) From f433f039c11af689d046afa077e536664dda05d8 Mon Sep 17 00:00:00 2001 From: johntruckenbrodt Date: Thu, 17 Oct 2019 17:22:06 +0200 Subject: [PATCH 23/23] [setup] aligned requirements with requirements.txt --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 69ab0b4..56aed57 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'Programming Language :: Python', ], install_requires=['gdal>=1.11.3', - 'spatialist', + 'spatialist>=0.3', 'progressbar2', 'requests>=2.8.1'], url='https://github.com/jonas-eberle/esa_sentinel.git',