From 077ea08c248fe455f8aa777a83cfd1f12246fb45 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 09:39:04 +1000 Subject: [PATCH 1/9] add --noremove option --- parallel_examples/awsbatch/do_prepare.py | 4 +++ parallel_examples/awsbatch/do_stitch.py | 36 ++++++++++--------- .../awsbatch/submit-pyshepseg-job.py | 4 +++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index cd2d7a2..b767d17 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -64,6 +64,8 @@ def getCmdargs(): help="Maximum spectral difference for segmentation (default=%(default)s)") p.add_argument("--spectDistPcntile", type=int, default=50, required=False, help="Spectral Distance Percentile for segmentation (default=%(default)s)") + p.add_argument("--noremove", action="store_true", default=False, + help="don't remove files from S3 (for debugging)") cmdargs = p.parse_args() if cmdargs.bands is not None: @@ -137,6 +139,8 @@ def main(): cmd.extend(['--spatialstats', cmdargs.spatialstats]) if cmdargs.nogdalstats: cmd.append('--nogdalstats') + if cmdargs.noremove: + cmd.append('--noremove') response = batch.submit_job(jobName="pyshepseg_stitch", jobQueue=cmdargs.jobqueue, diff --git a/parallel_examples/awsbatch/do_stitch.py b/parallel_examples/awsbatch/do_stitch.py index 29a3e3c..a76a909 100755 --- a/parallel_examples/awsbatch/do_stitch.py +++ b/parallel_examples/awsbatch/do_stitch.py @@ -49,6 +49,8 @@ def getCmdargs(): p.add_argument("--nogdalstats", action="store_true", default=False, help="don't calculate GDAL's statistics or write a colour table. " + "Can't be used with --stats.") + p.add_argument("--noremove", action="store_true", default=False, + help="don't remove files from S3 (for debugging)") cmdargs = p.parse_args() @@ -93,15 +95,16 @@ def main(): cmdargs.overlapsize, tempDir) # clean up files to release space - objs = [] - for col, row in tileFilenames: - filename = '{}_{}_{}.{}'.format(cmdargs.tileprefix, col, row, 'tif') - objs.append({'Key': filename}) - - # workaround 1000 at a time limit - while len(objs) > 0: - s3.delete_objects(Bucket=cmdargs.bucket, Delete={'Objects': objs[0:1000]}) - del objs[0:1000] + if not cmdargs.noremove: + objs = [] + for col, row in tileFilenames: + filename = '{}_{}_{}.{}'.format(cmdargs.tileprefix, col, row, 'tif') + objs.append({'Key': filename}) + + # workaround 1000 at a time limit + while len(objs) > 0: + s3.delete_objects(Bucket=cmdargs.bucket, Delete={'Objects': objs[0:1000]}) + del objs[0:1000] # open for the creation of stats localDs = gdal.Open(localOutfile, gdal.GA_Update) @@ -153,13 +156,14 @@ def main(): s3.upload_file(localOutfile, cmdargs.bucket, cmdargs.outfile) # cleanup temp files from S3 - objs = [{'Key': cmdargs.pickle}] - if cmdargs.stats is not None: - objs.append({'Key': statsKey}) - if cmdargs.spatialstats is not None: - objs.append({'Key': spatialstatsKey}) - - s3.delete_objects(Bucket=cmdargs.bucket, Delete={'Objects': objs}) + if not cmdargs.noremove: + objs = [{'Key': cmdargs.pickle}] + if cmdargs.stats is not None: + objs.append({'Key': statsKey}) + if cmdargs.spatialstats is not None: + objs.append({'Key': spatialstatsKey}) + + s3.delete_objects(Bucket=cmdargs.bucket, Delete={'Objects': objs}) # cleanup shutil.rmtree(tempDir) diff --git a/parallel_examples/awsbatch/submit-pyshepseg-job.py b/parallel_examples/awsbatch/submit-pyshepseg-job.py index 1e44579..d3ee754 100755 --- a/parallel_examples/awsbatch/submit-pyshepseg-job.py +++ b/parallel_examples/awsbatch/submit-pyshepseg-job.py @@ -59,6 +59,8 @@ def getCmdargs(): help="Maximum spectral difference for segmentation (default=%(default)s)") p.add_argument("--spectDistPcntile", type=int, default=50, required=False, help="Spectral Distance Percentile for segmentation (default=%(default)s)") + p.add_argument("--noremove", action="store_true", default=False, + help="don't remove files from S3 (for debugging)") cmdargs = p.parse_args() @@ -98,6 +100,8 @@ def main(): cmd.append('--nogdalstats') if cmdargs.tileprefix is not None: cmd.extend(['--tileprefix', cmdargs.tileprefix]) + if cmdargs.noremove: + cmd.append('--noremove') # submit the prepare job response = batch.submit_job(jobName="pyshepseg_prepare", From 5cf1cab47e1e046438d8caa48ed1523d62ed2ea8 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 10:44:35 +1000 Subject: [PATCH 2/9] build RIOS and workaround single tile cases --- parallel_examples/awsbatch/Dockerfile | 13 +++++++++++-- parallel_examples/awsbatch/do_prepare.py | 7 ++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/parallel_examples/awsbatch/Dockerfile b/parallel_examples/awsbatch/Dockerfile index 78b499f..6c2b235 100644 --- a/parallel_examples/awsbatch/Dockerfile +++ b/parallel_examples/awsbatch/Dockerfile @@ -37,9 +37,18 @@ RUN cd /tmp \ && make install \ && cd ../.. \ && rm -rf kealib-${KEALIB_VERSION} kealib-${KEALIB_VERSION}.tar.gz + +ENV RIOS_VERSION=2.0.3 +RUN cd /tmp \ + && wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \ + && tar xf rios-${RIOS_VERSION} \ + && cd rios-${RIOS_VERSION} \ + && pip install . \ + && cd .. \ + && rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz COPY pyshepseg-$PYSHEPSEG_VER.tar.gz /tmp -# install RIOS +# install pyshegseg RUN cd /tmp && tar xf pyshepseg-$PYSHEPSEG_VER.tar.gz \ && cd pyshepseg-$PYSHEPSEG_VER \ && DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \ @@ -73,9 +82,9 @@ RUN apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/* USER $SERVICEUSER # a few quick tests -#RUN gdal_translate --formats | grep KEA RUN python3 -c 'from osgeo import gdal;assert(gdal.GetDriverByName("KEA") is not None)' RUN python3 -c 'from pyshepseg import tiling' +RUN python3 -c 'from rios import applier' # export the volume VOLUME $SW_VOLUME diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index b767d17..72a81b6 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -111,6 +111,11 @@ def main(): # now submit an array job with all the tiles # (can't do this before now because we don't know how many tiles) + arrayProperties = None + if len(colRowList) > 1: + # throws error if this is 1... + arrayProperties = {'size': len(colRowList)} + containerOverrides = { "command": ['/usr/bin/python3', '/ubarscsw/bin/do_tile.py', '--bucket', cmdargs.bucket, '--pickle', cmdargs.pickle, @@ -121,7 +126,7 @@ def main(): response = batch.submit_job(jobName="pyshepseg_tiles", jobQueue=cmdargs.jobqueue, jobDefinition=cmdargs.jobdefntile, - arrayProperties={'size': len(colRowList)}, + arrayProperties=arrayProperties, containerOverrides=containerOverrides) tilesJobId = response['jobId'] print('Tiles Job Id', tilesJobId) From b23eae9e7c35b135a1ac782191e2896af12e10e2 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 00:51:05 +0000 Subject: [PATCH 3/9] Dockerfile fixes --- parallel_examples/awsbatch/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parallel_examples/awsbatch/Dockerfile b/parallel_examples/awsbatch/Dockerfile index 6c2b235..f634f50 100644 --- a/parallel_examples/awsbatch/Dockerfile +++ b/parallel_examples/awsbatch/Dockerfile @@ -41,9 +41,9 @@ RUN cd /tmp \ ENV RIOS_VERSION=2.0.3 RUN cd /tmp \ && wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \ - && tar xf rios-${RIOS_VERSION} \ + && tar xf rios-${RIOS_VERSION}.tar.gz \ && cd rios-${RIOS_VERSION} \ - && pip install . \ + && DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \ && cd .. \ && rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz From b8a779ff2d6603af599ed678f0eca9bb7f9f8c49 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 11:45:54 +1000 Subject: [PATCH 4/9] remove RIOS. Pull out alignment checks into separate function. Fix array job syntax --- parallel_examples/awsbatch/Dockerfile | 13 +--- parallel_examples/awsbatch/do_prepare.py | 2 +- pyshepseg/tilingstats.py | 98 ++++++++++++++---------- 3 files changed, 59 insertions(+), 54 deletions(-) diff --git a/parallel_examples/awsbatch/Dockerfile b/parallel_examples/awsbatch/Dockerfile index f634f50..78b499f 100644 --- a/parallel_examples/awsbatch/Dockerfile +++ b/parallel_examples/awsbatch/Dockerfile @@ -37,18 +37,9 @@ RUN cd /tmp \ && make install \ && cd ../.. \ && rm -rf kealib-${KEALIB_VERSION} kealib-${KEALIB_VERSION}.tar.gz - -ENV RIOS_VERSION=2.0.3 -RUN cd /tmp \ - && wget -q https://github.com/ubarsc/rios/releases/download/rios-${RIOS_VERSION}/rios-${RIOS_VERSION}.tar.gz \ - && tar xf rios-${RIOS_VERSION}.tar.gz \ - && cd rios-${RIOS_VERSION} \ - && DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \ - && cd .. \ - && rm -rf rios-${RIOS_VERSION} rios-${RIOS_VERSION}.tar.gz COPY pyshepseg-$PYSHEPSEG_VER.tar.gz /tmp -# install pyshegseg +# install RIOS RUN cd /tmp && tar xf pyshepseg-$PYSHEPSEG_VER.tar.gz \ && cd pyshepseg-$PYSHEPSEG_VER \ && DEB_PYTHON_INSTALL_LAYOUT=deb_system pip install . \ @@ -82,9 +73,9 @@ RUN apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/* USER $SERVICEUSER # a few quick tests +#RUN gdal_translate --formats | grep KEA RUN python3 -c 'from osgeo import gdal;assert(gdal.GetDriverByName("KEA") is not None)' RUN python3 -c 'from pyshepseg import tiling' -RUN python3 -c 'from rios import applier' # export the volume VOLUME $SW_VOLUME diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index 72a81b6..9dc9b33 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -111,7 +111,7 @@ def main(): # now submit an array job with all the tiles # (can't do this before now because we don't know how many tiles) - arrayProperties = None + arrayProperties = {} if len(colRowList) > 1: # throws error if this is 1... arrayProperties = {'size': len(colRowList)} diff --git a/pyshepseg/tilingstats.py b/pyshepseg/tilingstats.py index 7461c81..eb51f14 100644 --- a/pyshepseg/tilingstats.py +++ b/pyshepseg/tilingstats.py @@ -111,27 +111,8 @@ def calcPerSegmentStatsTiled(imgfile, imgbandnum, segfile, valid pixels (not nodata) that were used to calculate the statistics. """ - segds = segfile - if not isinstance(segds, gdal.Dataset): - segds = gdal.Open(segfile, gdal.GA_Update) - segband = segds.GetRasterBand(1) - - imgds = imgfile - if not isinstance(imgds, gdal.Dataset): - imgds = gdal.Open(imgfile, gdal.GA_ReadOnly) - imgband = imgds.GetRasterBand(imgbandnum) - if (imgband.DataType == gdal.GDT_Float32 or - imgband.DataType == gdal.GDT_Float64): - raise PyShepSegStatsError("Float image types not supported") - - if segband.XSize != imgband.XSize or segband.YSize != imgband.YSize: - raise PyShepSegStatsError("Images must be same size") - - if segds.GetGeoTransform() != imgds.GetGeoTransform(): - raise PyShepSegStatsError("Images must have same spatial extent and pixel size") - - if not equalProjection(segds.GetProjection(), imgds.GetProjection()): - raise PyShepSegStatsError("Images must be in the same projection") + segds, segband, imgds, imgband = doImageAlignmentChecks(segfile, + imgfile, imgbandnum) attrTbl = segband.GetDefaultRAT() existingColNames = [attrTbl.GetNameOfCol(i) @@ -184,6 +165,58 @@ def calcPerSegmentStatsTiled(imgfile, imgbandnum, segfile, raise PyShepSegStatsError('Not all pixels found during processing') +def doImageAlignmentChecks(segfile, imgfile, imgbandnum): + """ + Do the checks that the segment file and image file that is being used to + collect the stats actually align. We refuse to process the files if they + don't as it is not clear how they should be made to line up - this is up + to the user to get right. Also checks that imgfile is not a float image. + + Parameters + ---------- + segfile : str or gdal.Dataset + Path to segmented file or an open GDAL dataset. + imgfile : string + Path to input file for collecting statistics from + imgbandnum : int + 1-based index of the band number in imgfile to use for collecting stats + + Returns + ------- + segds: gdal.Dataset + Opened GDAL datset for the segments file + segband: gdal.Band + First Band of the segds + imgds: gdal.Dataset + Opened GDAL dataset for the image data file + imgband: gdal.Band + Requested band for the imgds + """ + segds = segfile + if not isinstance(segds, gdal.Dataset): + segds = gdal.Open(segfile, gdal.GA_Update) + segband = segds.GetRasterBand(1) + + imgds = imgfile + if not isinstance(imgds, gdal.Dataset): + imgds = gdal.Open(imgfile, gdal.GA_ReadOnly) + imgband = imgds.GetRasterBand(imgbandnum) + if (imgband.DataType == gdal.GDT_Float32 or + imgband.DataType == gdal.GDT_Float64): + raise PyShepSegStatsError("Float image types not supported") + + if segband.XSize != imgband.XSize or segband.YSize != imgband.YSize: + raise PyShepSegStatsError("Images must be same size") + + if segds.GetGeoTransform() != imgds.GetGeoTransform(): + raise PyShepSegStatsError("Images must have same spatial extent and pixel size") + + if not equalProjection(segds.GetProjection(), imgds.GetProjection()): + raise PyShepSegStatsError("Images must be in the same projection") + + return segds, segband, imgds, imgband + + @njit def accumulateSegDict(segDict, noDataDict, imgNullVal, tileSegments, tileImageData): """ @@ -1028,28 +1061,9 @@ def calcPerSegmentSpatialStatsTiled(imgfile, imgbandnum, segfile, The value to fill in for segments that have no data. """ - segds = segfile - if not isinstance(segds, gdal.Dataset): - segds = gdal.Open(segfile, gdal.GA_Update) - segband = segds.GetRasterBand(1) + segds, segband, imgds, imgband = doImageAlignmentChecks(segfile, + imgfile, imgbandnum) - imgds = imgfile - if not isinstance(imgds, gdal.Dataset): - imgds = gdal.Open(imgfile, gdal.GA_ReadOnly) - imgband = imgds.GetRasterBand(imgbandnum) - if (imgband.DataType == gdal.GDT_Float32 or - imgband.DataType == gdal.GDT_Float64): - raise PyShepSegStatsError("Float image types not supported") - - if segband.XSize != imgband.XSize or segband.YSize != imgband.YSize: - raise PyShepSegStatsError("Images must be same size") - - if segds.GetGeoTransform() != imgds.GetGeoTransform(): - raise PyShepSegStatsError("Images must have same spatial extent and pixel size") - - if not equalProjection(segds.GetProjection(), imgds.GetProjection()): - raise PyShepSegStatsError("Images must be in the same projection") - attrTbl = segband.GetDefaultRAT() existingColNames = [attrTbl.GetNameOfCol(i) for i in range(attrTbl.GetColumnCount())] From 57d88b47e973f482ffc3ee27058e87a6c4417974 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 12:42:23 +1000 Subject: [PATCH 5/9] workaround AWS_BATCH_JOB_ARRAY_INDEX not being set if one tile --- parallel_examples/awsbatch/do_prepare.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index 9dc9b33..eee075b 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -111,11 +111,6 @@ def main(): # now submit an array job with all the tiles # (can't do this before now because we don't know how many tiles) - arrayProperties = {} - if len(colRowList) > 1: - # throws error if this is 1... - arrayProperties = {'size': len(colRowList)} - containerOverrides = { "command": ['/usr/bin/python3', '/ubarscsw/bin/do_tile.py', '--bucket', cmdargs.bucket, '--pickle', cmdargs.pickle, @@ -123,6 +118,16 @@ def main(): '--minSegmentSize', str(cmdargs.minSegmentSize), '--maxSpectDiff', cmdargs.maxSpectDiff, '--spectDistPcntile', str(cmdargs.spectDistPcntile)]} + + arrayProperties = {} + if len(colRowList) > 1: + # throws error if this is 1... + arrayProperties = {'size': len(colRowList)} + else: + # must fake AWS_BATCH_JOB_ARRAY_INDEX + containerOverrides['environment'] = {'name': 'AWS_BATCH_JOB_ARRAY_INDEX', + 'value': '0'} + response = batch.submit_job(jobName="pyshepseg_tiles", jobQueue=cmdargs.jobqueue, jobDefinition=cmdargs.jobdefntile, From 60e2aea4a7033721775f8b6f31d54783877172e0 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 12:44:03 +1000 Subject: [PATCH 6/9] tidy --- parallel_examples/awsbatch/do_prepare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index eee075b..596a63c 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -122,7 +122,7 @@ def main(): arrayProperties = {} if len(colRowList) > 1: # throws error if this is 1... - arrayProperties = {'size': len(colRowList)} + arrayProperties['size'] = len(colRowList)} else: # must fake AWS_BATCH_JOB_ARRAY_INDEX containerOverrides['environment'] = {'name': 'AWS_BATCH_JOB_ARRAY_INDEX', From 1b4028bc4892c2c024c6811aed2a8cedaaa88e14 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 12:45:34 +1000 Subject: [PATCH 7/9] typo --- parallel_examples/awsbatch/do_prepare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index 596a63c..fde586d 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -122,7 +122,7 @@ def main(): arrayProperties = {} if len(colRowList) > 1: # throws error if this is 1... - arrayProperties['size'] = len(colRowList)} + arrayProperties['size'] = len(colRowList) else: # must fake AWS_BATCH_JOB_ARRAY_INDEX containerOverrides['environment'] = {'name': 'AWS_BATCH_JOB_ARRAY_INDEX', From 12894a1daa7af225b215e198774c56adb2cff789 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 03:41:12 +0000 Subject: [PATCH 8/9] list --- parallel_examples/awsbatch/do_prepare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index fde586d..4a7bbab 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -125,8 +125,8 @@ def main(): arrayProperties['size'] = len(colRowList) else: # must fake AWS_BATCH_JOB_ARRAY_INDEX - containerOverrides['environment'] = {'name': 'AWS_BATCH_JOB_ARRAY_INDEX', - 'value': '0'} + containerOverrides['environment'] = [{'name': 'AWS_BATCH_JOB_ARRAY_INDEX', + 'value': '0'}] response = batch.submit_job(jobName="pyshepseg_tiles", jobQueue=cmdargs.jobqueue, From fde10ce1ca8913c81d92130c7ee535f0053f6424 Mon Sep 17 00:00:00 2001 From: Sam Gillingham Date: Thu, 27 Jun 2024 08:34:51 +0000 Subject: [PATCH 9/9] AWS resets AWS_BATCH_JOB_ARRAY_INDEX so use a command line arg instead --- parallel_examples/awsbatch/do_prepare.py | 4 ++-- parallel_examples/awsbatch/do_tile.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/parallel_examples/awsbatch/do_prepare.py b/parallel_examples/awsbatch/do_prepare.py index 4a7bbab..df35f33 100755 --- a/parallel_examples/awsbatch/do_prepare.py +++ b/parallel_examples/awsbatch/do_prepare.py @@ -125,8 +125,8 @@ def main(): arrayProperties['size'] = len(colRowList) else: # must fake AWS_BATCH_JOB_ARRAY_INDEX - containerOverrides['environment'] = [{'name': 'AWS_BATCH_JOB_ARRAY_INDEX', - 'value': '0'}] + # can't set this as and env var as Batch overrides + containerOverrides['command'].extend(['--arrayindex', '0']) response = batch.submit_job(jobName="pyshepseg_tiles", jobQueue=cmdargs.jobqueue, diff --git a/parallel_examples/awsbatch/do_tile.py b/parallel_examples/awsbatch/do_tile.py index 9834623..affee72 100755 --- a/parallel_examples/awsbatch/do_tile.py +++ b/parallel_examples/awsbatch/do_tile.py @@ -21,13 +21,6 @@ gdal.UseExceptions() -# set by AWS Batch -ARRAY_INDEX = os.getenv('AWS_BATCH_JOB_ARRAY_INDEX') -if ARRAY_INDEX is None: - raise SystemExit('Must set AWS_BATCH_JOB_ARRAY_INDEX env var') - -ARRAY_INDEX = int(ARRAY_INDEX) - def getCmdargs(): """ @@ -48,9 +41,17 @@ def getCmdargs(): help="Maximum spectral difference for segmentation (default=%(default)s)") p.add_argument("--spectDistPcntile", type=int, default=50, required=False, help="Spectral Distance Percentile for segmentation (default=%(default)s)") + p.add_argument("--arrayindex", type=int, + help="Override AWS_BATCH_JOB_ARRAY_INDEX env var") cmdargs = p.parse_args() + if cmdargs.arrayindex is None: + cmdargs.arrayindex = os.getenv('AWS_BATCH_JOB_ARRAY_INDEX') + if cmdargs.arrayindex is None: + raise SystemExit('Must set AWS_BATCH_JOB_ARRAY_INDEX env var or ' + + 'specify --arrayindex') + return cmdargs @@ -75,7 +76,7 @@ def main(): tempDir = tempfile.mkdtemp() # work out which tile we are processing - col, row = dataFromPickle['colRowList'][ARRAY_INDEX] + col, row = dataFromPickle['colRowList'][cmdargs.arrayindex] # work out a filename to save with the output of this tile # Note: this filename format is repeated in do_stitch.py