Merge pull request #28 from Queeno11/Nico

Version funcionando!!
Queeno11 · Feb 1, 2024 · 32d54fc · 32d54fc
2 parents 57f3f63 + 75a6f76
commit 32d54fc
Show file tree

Hide file tree

Showing 9 changed files with 3,507 additions and 1,647 deletions.
diff --git a/scripts/PyQGIS scripts/qgis_0_preprocessing_PNEO.py b/scripts/PyQGIS scripts/qgis_0_preprocessing_PNEO.py
@@ -10,14 +10,14 @@
 
 # Parameters for the algorithm
 sat_captures = [
-    # "000047717_1_22_STD_A",
-    # "000047717_1_24_STD_A",
-    # "000047717_1_25_STD_A",
-    # "000047717_1_26_STD_A",
-    "000058605_1_3_STD_A",
-    "000058605_1_4_STD_A",
-    "000058605_1_7_STD_A",
-    "000058608_1_2_STD_A",
+    #"000047717_1_22_STD_A",
+    "000047717_1_24_STD_A",
+    #"000047717_1_25_STD_A",
+    #"000047717_1_26_STD_A",
+    # "000058605_1_3_STD_A",
+    # "000058605_1_4_STD_A",
+    # "000058605_1_7_STD_A",
+    # "000058608_1_2_STD_A",
 ]
 
 path = r"F:\Imagenes Satelitales\2022"
@@ -40,12 +40,16 @@
     rgb_files = [os.path.join(folder, f) for f in rgb_files]
     ned_files = [f.replace("_RGB_", "_NED_") for f in rgb_files]
     out_files = [f.replace("_RGB_", "_") for f in rgb_files]
-
+    
     assert len(rgb_files) == len(ned_files) == len(out_files)
 
     i = 0
     n_imgs = len(rgb_files)
     for rgb, ned, out in zip(rgb_files, ned_files, out_files):
+        out = os.path.split(out)[-1]
+        out = rf"E:\2022 imagenes\{out}"
+        if os.path.isfile(out):
+            continue
         # Step 1: Extract NIR band using gdal:translate
         result_extract_nir = processing.run(
             "gdal:translate",
@@ -57,8 +61,7 @@
             },
         )
         nir_only = QgsRasterLayer(result_extract_nir["OUTPUT"], "Extracted NIR")
-        out = os.path.split(out)[-1]
-        out = rf"E:\2022 imagenes\{out}"
+
         result_merge_rgb_nir = processing.run(
             "gdal:merge",
             {

diff --git a/scripts/PyQGIS scripts/qgis_1b_pansharpening_qgis_PNEO.py b/scripts/PyQGIS scripts/qgis_1b_pansharpening_qgis_PNEO.py
@@ -8,8 +8,10 @@
 path = r"E:\2022 imagenes"
 output_folder = r"D:\Maestría\Tesis\Repo\data\data_in\Pansharpened"
 files = os.listdir(path)
-ms_files = [f for f in files if f.endswith(".TIF") and "_MS-FS_" in f]
-
+imgs = [
+    "IMG_PNEO4_202211031357396_MS-FS",
+]
+ms_files = [f for f in files if f.endswith(".TIF") and any(img in f for img in imgs)]
 multispectral = [os.path.join(path, f) for f in ms_files]
 panchromatic = [f.replace("_MS-FS_", "_PAN_").replace("_P_", "") for f in multispectral]
 
@@ -24,6 +26,9 @@
     r = region[1]
     c = region[3]
     output_name = f"pansharpened_{satellite_pic_number}_R{r}C{c}.tif"
+    assert os.path.isfile(ms), "MS file doesnt exist"
+    assert os.path.isfile(p), f"{p} file doesnt exist"
+    assert os.path.isdir(output_folder), "out folder doesnt exist"
     out = os.path.join(output_folder, output_name)
 
     processing.run(
@@ -32,10 +37,12 @@
             "SPECTRAL": ms,
             "PANCHROMATIC": p,
             "OUTPUT": out,
-            "RESAMPLING": 2,
-            "OPTIONS": "COMPRESS=DEFLATE|PREDICTOR=2",
-            "EXTRA": "",
         },
     )
-    print(f"{out} created. {n_imgs-i} images remains.")
+    assert os.path.isfile(out), f"WTF {p}"
+
+    os.remove(ms)
+    os.remove(p)
+    print(f"{out} created. {n_imgs-i} images remains. MS and Pan files removed.")
+
     i += 1
diff --git a/scripts/PyQGIS scripts/qgis_2_tiff_compression.py b/scripts/PyQGIS scripts/qgis_2_tiff_compression.py
@@ -1,5 +1,6 @@
 import processing
 import numpy as np
+import os
 from qgis.core import (
     QgsCoordinateReferenceSystem,
     QgsRasterLayer,
@@ -24,7 +25,7 @@
 #     return bands_percentiles
 
 
-def get_sample_array(input_path, sample=100_000):
+def get_sample_array(input_path, sample=5_000):
     rasterArray = gdal_array.LoadFile(input_path)  # Read raster as numpy array
     bands_data = {}
     for band in [0, 1, 2, 3]:
@@ -34,89 +35,11 @@ def get_sample_array(input_path, sample=100_000):
 
 
 # Parameters for the algorithm
-images = [
-"pansharpened_202212201401079_R4C2.tif",
-"pansharpened_202211011409151_R1C1.tif",
-"pansharpened_202211011409151_R1C2.tif",
-"pansharpened_202211011409151_R1C3.tif",
-"pansharpened_202211011409151_R2C1.tif",
-"pansharpened_202211011409151_R2C3.tif",
-"pansharpened_202211011409151_R3C1.tif",
-"pansharpened_202211011409151_R3C3.tif",
-"pansharpened_202211011409151_R4C1.tif",
-"pansharpened_202211011409151_R4C2.tif",
-"pansharpened_202211011409151_R4C3.tif",
-"pansharpened_202211011409151_R5C1.tif",
-"pansharpened_202211011409151_R5C2.tif",
-"pansharpened_202211011409151_R5C3.tif",
-"pansharpened_202211011409151_R6C1.tif",
-"pansharpened_202211011409151_R6C2.tif",
-"pansharpened_202211011409151_R6C3.tif",
-"pansharpened_202211011409151_R7C1.tif",
-"pansharpened_202211011409151_R7C2.tif",
-"pansharpened_202211011409151_R7C3.tif",
-"pansharpened_202211011409151_R8C1.tif",
-"pansharpened_202211011409151_R8C2.tif",
-"pansharpened_202211011409151_R8C3.tif",
-"pansharpened_202211031357171_R1C1.tif",
-"pansharpened_202211031357171_R1C2.tif",
-"pansharpened_202211031357171_R1C3.tif",
-"pansharpened_202211031357171_R1CC.tif",
-"pansharpened_202211031357171_R2C1.tif",
-"pansharpened_202211031357171_R2C2.tif",
-"pansharpened_202211031357171_R2C3.tif",
-"pansharpened_202211031357171_R3C1.tif",
-"pansharpened_202211031357171_R3C2.tif",
-"pansharpened_202211031357171_R3C3.tif",
-"pansharpened_202211031357171_R4C2.tif",
-"pansharpened_202211031357171_R4C3.tif",
-"pansharpened_202211031357171_R5C3.tif",
-"pansharpened_202211031357171_R6C3.tif",
-"pansharpened_202211031357171_R7C3.tif",
-"pansharpened_202211031357171_R8C1.tif",
-"pansharpened_202211031357396_R1C1.tif",
-"pansharpened_202211031357396_R1C2.tif",
-"pansharpened_202211031357396_R2C2.tif",
-"pansharpened_202211031357396_R3C2.tif",
-"pansharpened_202211031357396_R4C1.tif",
-"pansharpened_202211031357396_R4C2.tif",
-"pansharpened_202211031357396_R5C1.tif",
-"pansharpened_202211031357396_R5C2.tif",
-"pansharpened_202211031357396_R6C1.tif",
-"pansharpened_202211031357396_R6C2.tif",
-"pansharpened_202211031357574_R1C1.tif",
-"pansharpened_202211031357574_R1C2.tif",
-"pansharpened_202211031357574_R2C2.tif",
-"pansharpened_202211031357574_R3C2.tif",
-"pansharpened_202211031357574_R5C2.tif",
-"pansharpened_202211031357574_R6C1.tif",
-"pansharpened_202211031357574_R6C2.tif",
-"pansharpened_202211031357574_R7C1.tif",
-"pansharpened_202211031357574_R7C2.tif",
-"pansharpened_202211031357574_R8C1.tif",
-"pansharpened_202211031357574_R8C2.tif",
-"pansharpened_202212041353517_R4C2.tif",
-"pansharpened_202212041353517_R5C1.tif",
-"pansharpened_202212041353517_R5C2.tif",
-"pansharpened_202212041354167_R1C1.tif",
-"pansharpened_202212041354167_R1C2.tif",
-"pansharpened_202212041354167_R2C1.tif",
-"pansharpened_202212041354167_R4C1.tif",
-"pansharpened_202212041354167_R4C2.tif",
-"pansharpened_202212051412329_R1C1.tif",
-"pansharpened_202212051412329_R2C1.tif",
-"pansharpened_202212051412329_R3C1.tif",
-"pansharpened_202212201401079_R1C2.tif",
-"pansharpened_202212201401079_R2C2.tif",
-"pansharpened_202212201401079_R3C1.tif",
-"pansharpened_202212201401079_R3C2.tif",
-"pansharpened_202212201401079_R4C1.tif",
-]
-
-
 path_in = r"D:\Maestría\Tesis\Repo\data\data_in\Pansharpened"
-path_out = r"D:\Maestría\Tesis\Repo\data\data_in\Compressed\2018"
+path_out = r"D:\Maestría\Tesis\Repo\data\data_in\Compressed\2022"
 
+images = os.listdir(path_in)
+images = [img for img in images if img.endswith(".tif")]
 
 ## Compute percentiles over all the images
 images_sample_data = {}
@@ -147,13 +70,22 @@ def get_sample_array(input_path, sample=100_000):
         for i, (min_val, max_val) in bands_percentiles.items()
     ]
 )
+with open(f"{path_in}/scale_options.txt", "w") as text_file:
+    text_file.write(scale_options)
+
+## Read scale options (previous line might take a lot of time)
+with open(rf"{path_in}/scale_options.txt", "r") as text_file:
+    scale_options = text_file.read()
+
 
 i = 0
 n_imgs = len(images)
 for image in images:
     ## Process the layr
     # Load layer
-    layer = QgsRasterLayer(f"{path_in}/{image}")
+    file_in = f"{path_in}/{image}"
+    file_out = f"{path_out}/{image}"
+    layer = QgsRasterLayer(file_in)
 
     # Adjust gamma and brightness
     contrastFilter = QgsBrightnessContrastFilter()
@@ -177,8 +109,11 @@ def get_sample_array(input_path, sample=100_000):
             "OPTIONS": "COMPRESS=JPEG|JPEG_QUALITY=75|TILED=Yes",
             "EXTRA": scale_options,
             "DATA_TYPE": 1,
-            "OUTPUT": f"{path_out}/{image}",
+            "OUTPUT": file_out,
         },
     )
-    print(f"{path_out}/{image} created. {n_imgs-i} images remains.")
+    QgsProject.instance().removeMapLayer(layer.id())
+    assert os.path.isfile(file_out)
+    os.remove(file_in)
+    print(f"{file_out} created. {n_imgs-i} images remains.")
     i += 1
diff --git a/scripts/build_dataset.py b/scripts/build_dataset.py
@@ -21,6 +21,8 @@
 path_dataout = env["PATH_DATAOUT"]
 path_scripts = env["PATH_SCRIPTS"]
 path_satelites = env["PATH_SATELITES"]
+path_nocturnas = env["PATH_NOCTURNAS"]
+path_landsat = env["PATH_LANDSAT"]
 path_logs = env["PATH_LOGS"]
 path_outputs = env["PATH_OUTPUTS"]
 path_imgs = env["PATH_IMGS"]
@@ -56,6 +58,45 @@ def load_satellite_datasets(year=2013, stretch=False):
 
     return datasets, extents
 
+def load_landsat_datasets(stretch=False):
+    """Load satellite datasets and get their extents"""
+
+    files = os.listdir(rf"{path_landsat}")
+    assert os.path.isdir(rf"{path_landsat}")
+    files = [f for f in files if f.endswith(".tif")]
+    assert all([os.path.isfile(rf"{path_landsat}/{f}") for f in files])
+
+    datasets = {
+        f.replace(".tif", ""): (normalize_landsat(xr.open_dataset(rf"{path_landsat}/{f}")))
+        for f in files
+    }
+    if stretch:
+        datasets = {name: stretch_dataset(ds) for name, ds in datasets.items()}
+
+    extents = {name: utils.get_dataset_extent(ds) for name, ds in datasets.items()}
+
+    return datasets, extents
+
+def load_nightlight_datasets(stretch=False):
+    """Load satellite datasets and get their extents"""
+
+    files = os.listdir(rf"{path_nocturnas}")
+    assert os.path.isdir(rf"{path_nocturnas}")
+    files = [f for f in files if f.endswith(".tif")]
+    assert all([os.path.isfile(rf"{path_nocturnas}/{f}") for f in files])
+
+    datasets = {
+        f.replace(".tif", ""): (xr.open_dataset(rf"{path_nocturnas}/{f}"))
+        for f in files
+    }
+    if stretch:
+        datasets = {name: stretch_dataset(ds) for name, ds in datasets.items()}
+
+    extents = {name: utils.get_dataset_extent(ds) for name, ds in datasets.items()}
+
+    return datasets, extents
+
+
 
 def load_icpag_dataset(variable="ln_pred_inc_mean", trim=True):
     """Open ICPAG dataset and merge with ELL estimation."""
@@ -117,7 +158,7 @@ def assign_datasets_to_gdf(gdf, extents, year=2013, centroid=False, verbose=True
     return gdf
 
 
-def split_train_test(df):
+def split_train_test(df, buffer=0):
     """Blocks are counted from left to right, one count for test and one for train."""
 
     # Set bounds of test dataset blocks
@@ -141,11 +182,11 @@ def split_train_test(df):
 
     # These blocks are the train dataset.
     #   The hole image have to be inside the train region
-    df.loc[df.max_x < test1_min_x, "train_block"] = 1  # A la izqauierda de test1
+    df.loc[df.max_x+buffer < test1_min_x, "train_block"] = 1  # A la izqauierda de test1
     df.loc[
-        (df.min_x > test1_max_x) & (df.max_x < test2_min_x), "train_block"
+        (df.min_x-buffer > test1_max_x) & (df.max_x+buffer < test2_min_x), "train_block"
     ] = 2  # Entre test1 y test2
-    df.loc[df.min_x > test2_max_x, "train_block"] = 3  # A la derecha de test2
+    df.loc[df.min_x-buffer > test2_max_x, "train_block"] = 3  # A la derecha de test2
     # print(df.train_block.value_counts())
 
     # Put nans in the overlapping borders
@@ -583,4 +624,27 @@ def stretch_dataset(ds, pixel_depth=32_767):
     ds = (ds - minimum) / (maximum - minimum) * pixel_depth
     ds = ds.where(ds.band_data > 0, 0)
     ds = ds.where(ds.band_data < pixel_depth, pixel_depth)
+    return ds
+
+def normalize_landsat(ds):
+    band_data = ds.band_data.to_numpy()
+    for band in range(band_data.shape[0]):   
+        this_band = band_data[band]
+
+        vmin = np.percentile(this_band, q=2)
+        vmax = np.percentile(this_band, q=98)
+
+        # High values
+        mask = this_band > vmax
+        this_band[mask] = vmax
+
+        # low values
+        mask = this_band < vmin
+        this_band[mask] = vmin
+
+        # Normalize
+        this_band = (this_band - vmin) / (vmax - vmin)
+
+        band_data[band] = this_band * 255
+
     return ds