Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ntl-metadata #84

Merged
merged 37 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b8c3028
refactor: stac_columns test to compare with EXCEL rather than types.json
Gabe-Levin Oct 30, 2024
e85abcd
refactor: get all item properties from metadata excel
Gabe-Levin Oct 30, 2024
d322864
docs(readme): added instructions for adding new item to collection
Gabe-Levin Oct 30, 2024
0860dad
feat: add item selection to verify_columns in ingestion pre-check
Gabe-Levin Oct 30, 2024
019221d
docs(readme): slight revision
Gabe-Levin Oct 30, 2024
32918c3
feat: ability to link new items to the existing collection. ex. 2013 …
Gabe-Levin Oct 30, 2024
68af607
fix: run pre-commit checks locally
Gabe-Levin Oct 30, 2024
da604ac
docs(readme): fix instructions for adding new item
Gabe-Levin Oct 30, 2024
cad5854
feat: add item_name to download_and_load
Gabe-Levin Nov 7, 2024
0aa5a03
fix: add clean database to fixtures to pass ingest tests
Gabe-Levin Nov 8, 2024
e4e364a
docs(readme): more detailed instructions and update formatting
Gabe-Levin Nov 8, 2024
b272a40
fix: running pre-commit checks locally
Gabe-Levin Nov 8, 2024
8d0d1dc
fix: add pandas to test dependencies
Gabe-Levin Nov 8, 2024
7936991
fix: updated poetry.lock file using 'poetry lock' cmd
Gabe-Levin Nov 8, 2024
98ac46c
Revert "fix: updated poetry.lock file using 'poetry lock' cmd"
Gabe-Levin Nov 8, 2024
a581c99
fix: add specific version of pandas to test dependencies
Gabe-Levin Nov 8, 2024
cac4242
fix: skipping metadata test, due to pandas dependency failing in gh a…
Gabe-Levin Nov 8, 2024
4e74c82
test: turn on metadata test to record error msg
Gabe-Levin Nov 11, 2024
92c31ad
fix: run pre-commit checks locally
Gabe-Levin Nov 11, 2024
2320eb4
fix: skip metadata test
Gabe-Levin Nov 12, 2024
5e2c3cb
refactor: link_new_item.py
andresfchamorro Nov 12, 2024
e20e8a8
run pre-commit
andresfchamorro Nov 12, 2024
988fa9d
fix: rename item ids back to original
andresfchamorro Nov 12, 2024
ea5628b
fix: move and update readme
andresfchamorro Nov 12, 2024
90ecaaf
add variable description to table extension
andresfchamorro Nov 13, 2024
00a07dd
fix: precommit checks
Gabe-Levin Nov 14, 2024
2cfe13d
fix: include hex_id in new item metadata
andresfchamorro Nov 15, 2024
b3b8549
feat: adding nighttime_lights_2013 item
Gabe-Levin Nov 15, 2024
30bdb2c
added urbanization to STAC catalog
bpstewar Nov 18, 2024
d7b498b
Adding urbanization to STAC again
bpstewar Nov 18, 2024
2ac5c2d
remove duplicate urban item
andresfchamorro Nov 18, 2024
d015f69
run pre-commit locally
andresfchamorro Nov 18, 2024
11ec938
feat: update metadata tests
Gabe-Levin Nov 20, 2024
23d1711
Merge remote-tracking branch 'origin/main' into feature/ntl-metadata
Gabe-Levin Nov 22, 2024
29c6a4e
Merge remote-tracking branch 'origin/main' into feature/ntl-metadata
Gabe-Levin Nov 26, 2024
2321bf2
feat: minor tweaks for successful NTL ingest into test db
Gabe-Levin Nov 26, 2024
bcf7098
fix: include parquet hex_id in verify columns, STAC hex_id need to be…
Gabe-Levin Nov 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,295 changes: 1,295 additions & 0 deletions notebooks/MP_SCRIPTS/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

1,512 changes: 1,512 additions & 0 deletions notebooks/MP_SCRIPTS/Population_demographics/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

1,414 changes: 1,414 additions & 0 deletions notebooks/MP_SCRIPTS/Population_demographics/TEST_zonal_stats_data.ipynb

Large diffs are not rendered by default.

Empty file.
File renamed without changes.
1,414 changes: 1,414 additions & 0 deletions notebooks/MP_SCRIPTS/TEST_zonal_stats_data.ipynb

Large diffs are not rendered by default.

4,420 changes: 0 additions & 4,420 deletions notebooks/MP_SCRIPTS/global_f_0_2020_1km_zonal.csv

This file was deleted.

160 changes: 80 additions & 80 deletions notebooks/MP_SCRIPTS/testing.ipynb

Large diffs are not rendered by default.

119 changes: 87 additions & 32 deletions notebooks/MP_SCRIPTS/zonal_urbanization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")

if __name__ == "__main__":
multiprocess = True
verbose = True
run_urban = True
run_urban = False
run_urban_pop = True
run_urban_pop = False

tPrint("Starting")
h3_level = 6
data_prefix = "Urbanization"
Expand All @@ -35,6 +34,17 @@
# Urbanization layers
unq_urban = [11, 12, 13, 21, 22, 23, 30]
ghsl_folder = "/home/public/Data/GLOBAL/GHSL/"

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
ghs_pop = os.path.join(
ghsl_folder, "Pop", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
)

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
Expand All @@ -44,6 +54,7 @@

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

# tPrint("H3_0 list generated")

h3_1_list = h3_helper.generate_lvl1_lists(
Expand All @@ -55,6 +66,22 @@
urban_pop_args = []
urban_args = []
for h3_1_key, cur_gdf in h3_1_list.items():
if run_urban_pop:
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
urban_pop_args.append(
[cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
)
if run_urban:
# set up mp arguments for urban summary
urban_filename = "GHS_SMOD_2020.csv"
urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
urban_args.append(
[cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path]
)
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
Expand All @@ -78,41 +105,69 @@
if multiprocess:
with multiprocessing.Pool(processes=min([70, len(urban_args)])) as pool:
results = pool.starmap(global_zonal.zonal_stats_categories, urban_args)
tPrint(f"Finished urban calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in arg_list:
results = run_zonal(*a)
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
f"s3://{AWS_S3_BUCKET}/{out_file}",
index=False,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
for a in urban_args:
results = global_zonal.zonal_stats_categories(*a)
out_file = list(results.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint(f"Finished {out_file}")

if run_urban_pop:
tPrint(
f"Running calculations on urban population: {len(urban_pop_args)} processes"
)
# Run multi processing on urban_pop_calculations
if multiprocess:
with multiprocessing.Pool(processes=min([70, len(urban_pop_args)])) as pool:
with multiprocessing.Pool(processes=min([40, len(urban_pop_args)])) as pool:
results = pool.starmap(
global_zonal.zonal_stats_categorical, urban_pop_args
)
tPrint(f"Finished multiprocessing urban pop calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in arg_list:
results = run_zonal(*a)
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
f"s3://{AWS_S3_BUCKET}/{out_file}",
index=False,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
for a in urban_pop_args:
combo = global_zonal.zonal_stats_categorical(
*a, verbose=verbose, minVal=0
)
out_file = list(combo.keys())[0]
tPrint(f"Completed {out_file}")
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint("Finished")
Loading