-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
149 lines (120 loc) · 5.21 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
from datetime import datetime # basic date and time types
import pandas as pd # python data analysis library
import boto3
from botocore.handlers import disable_signing
import re
from pathlib import Path
import urllib.request
import zipfile
s3 = boto3.resource('s3')
s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
year = '2023'
month = '09'
day = '15'
start_hour = '17'
start_minute = '13'
end_hour = '17'
end_minute = '17'
bucket_name = "noaa-nesdis-n21-pds"
product = "true_color"
bands = ["M3", "M4", "M5"]
targets_data_geo = ["VIIRS-IMG-GEO-TC", "VIIRS-MOD-GEO-TC"]
samples_path = './jpss_samples'
shapefiles_path = './shapefiles'
output_dir = './output'
output_file = f'{bucket_name}-{product}-{year}-{month}-{day}-{start_hour}'
shapefiles_url = 'https://www.soest.hawaii.edu/pwessel/gshhg/gshhg-shp-2.3.7.zip'
start_limiter = datetime(int(year), int(month), int(day), int(start_hour), int(start_minute), 0)
end_limiter = datetime(int(year), int(month), int(day), int(end_hour), int(end_minute), 0)
def clean_dir(target_dir):
folder = Path(target_dir)
for item in folder.rglob("*"):
try:
if item.is_file() and not item.name.startswith("."):
item.unlink()
except OSError as ose:
print(f"Failed to delete {item}. {ose}")
def list_blobs(bucket, prefix):
"""Lists all the blobs in the bucket."""
storage = s3.Bucket(bucket)
blobs = storage.objects.filter(Prefix=prefix)
results = []
for blob in blobs:
results.append(blob.key)
return results
def parse_dates(key):
s_date = re.search('_(d.*)_e', key).group(1)
year = int(s_date[1:5])
month = int(s_date[5:7])
day = int(s_date[7:9])
hour = int(s_date[11:13])
minute = int(s_date[13:15])
seconds = int(s_date[15:17])
string_dt = datetime(year, month, day, hour, minute, seconds)
return string_dt
def download_blob(bucket, source_blob_name, destination_file_name):
storage = s3.Bucket(bucket)
try:
local_size = Path(f'{destination_file_name}').stat().st_size
except FileNotFoundError:
local_size = None
s3_object = s3.Object(bucket, source_blob_name)
if local_size != s3_object.content_length:
storage.download_file(source_blob_name, destination_file_name)
print(f'Blob {source_blob_name} downloaded to {destination_file_name}')
else:
print(f'File {source_blob_name} size matches local copy, skipping...')
def download_data():
for band in bands:
print(f"Searching for band {band}...")
target_data = f"VIIRS-{band}-SDR"
results_data = list_blobs(bucket_name, f"{target_data}/{year}/{month}/{day}")
dfr = pd.DataFrame(results_data, columns=['Files'])
dfr['Date'] = dfr.Files.apply(parse_dates)
lets_get = dfr[(dfr.Date >= start_limiter) & (dfr.Date < end_limiter)].Files.to_list()
print('Filtered to:', len(lets_get))
print(lets_get)
for file in lets_get:
file_name = file.rsplit('/', 1)[-1]
download_blob(bucket_name, file, f'{samples_path}/{file_name}')
def download_data_geo():
for target in targets_data_geo:
print(f"Searching for {target}...")
results_geo = list_blobs(bucket_name, f"{target}/{year}/{month}/{day}/")
dfr_geo = pd.DataFrame(results_geo, columns=['Files'])
dfr_geo['Date'] = dfr_geo.Files.apply(parse_dates)
lets_get_geo = dfr_geo[(dfr_geo.Date >= start_limiter) & (dfr_geo.Date < end_limiter)].Files.to_list()
print('Filtered to:', len(lets_get_geo))
for file in lets_get_geo:
file_name = file.rsplit('/', 1)[-1]
download_blob(bucket_name, file, f'{samples_path}/{file_name}')
def update_shapefiles():
try:
shapefile_info = urllib.request.urlopen(shapefiles_url).info().get('Content-Length', 0)
except:
shapefile_info = None
try:
local_size = Path(f"{shapefiles_path}/{os.path.basename(shapefiles_url)}").stat().st_size
except FileNotFoundError:
local_size = None
if local_size is None or (int(shapefile_info) != local_size):
print(f"Going to download {shapefiles_url}")
urllib.request.urlretrieve(shapefiles_url, f"{shapefiles_path}/{os.path.basename(shapefiles_url)}")
print("Shapefiles downloaded, unziping...")
with zipfile.ZipFile(f"{shapefiles_path}/{os.path.basename(shapefiles_url)}", 'r') as zip_ref:
zip_ref.extractall(f"{shapefiles_path}/")
print("Shapefiles unzip completed")
else:
print("Shapefiles up to date")
#clean_dir(samples_path)
download_data()
download_data_geo()
update_shapefiles()
os.environ["USE_POLAR2GRID_DEFAULTS"] = "1"
from polar2grid.glue import main as polar2grid
polar2grid_args = ["-r", "viirs_sdr", "-w", "geotiff", "--output-filename", f"{output_dir}/{output_file}.tif", "-vvv", "-p", product, "-f", str(samples_path)]
polar2grid(argv=polar2grid_args)
#from polar2grid.add_coastlines import main as add_costlines
#add_costlines_args = ["--shapes-dir", str(shapefiles_path), "--add-coastlines", "--add-grid", "--grid-D", "10.0","10.0", "--grid-d", "10.0", "10.0", "--grid-text-size", "20", f"{output_dir}/{output_file}.tif"]
#add_costlines(argv=add_costlines_args)