-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvonLaszewski-cloudmask-related.tex
821 lines (651 loc) · 36.4 KB
/
vonLaszewski-cloudmask-related.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
\documentclass[sigplan,screen]{acmart}
\input{cloudmask-report-format}
%\setcopyright{acmcopyright}
%\copyrightyear{2018}
%\acmYear{2018}
%\acmDOI{XXXXXXX.XXXXXXX}
%\acmConference[MLCommons Science Working Group Report]{MLCommons Cloud Masking Benchmark with Early Stopping }{Jun. 30, revised 9 Dec. 2023}{}
%\acmBooktitle{Woodstock '18: ACM Symposium on Neural Gaze Detection,
% June 03--05, 2018, Woodstock, NY}
%\acmPrice{15.00}
%\acmISBN{978-1-4503-XXXX-X/18/06}
%%\acmSubmissionID{123-A56-BU3}
%%\citestyle{acmauthoryear}
\begin{document}
\newcommand{\TITLE}{An Overview of MLCommons Cloud Masking Benchmark: \\ Related Research and Data \\ {\normalsize Version 1.1}}
\title[Overview of MLCommons Cloud Masking: Related Research]{\TITLE}
% \titlenote{\url{https://github.com/laszewski/papers/raw/master/vonLaszewski-cloudmask-related.pdf}, \url{https://arxiv.org/submit/5278659/view}}
\titlenote{\url{https://github.com/laszewski/papers/raw/master/vonLaszewski-cloudmask-related.pdf}}
\author{Gregor von Laszewski}
\email{[email protected]}
\orcid{0000-0001-9558-179X}
\authornote{MLCommons authorized submitting author}
\affiliation{%
\institution{University of Virginia}
\streetaddress{Biocomplexity Institute and Initiative\\
Town Center Four\\
994 Research Park Boulevard}
\city{Charlottesville}
\state{VA}
\postcode{22911}
\country{USA}
}
\author{Ruochen Gu}
\email{[email protected]}
\affiliation{%
\institution{}
\streetaddress{}
\city{Shanghai}
\state{}
\postcode{}
\country{CN}
}
\renewcommand{\shortauthors}{von Laszewski et al.}
\begin{abstract}
Cloud masking is a crucial task that is well-motivated for
meteorology and its applications in environmental and atmospheric
sciences. Its goal is, given satellite images, to accurately
generate cloud masks that identify each pixel in images as cloud or clear sky. In this paper, we summarize some of the
ongoing research activities in cloud masking, with a focus on the
research and benchmark currently conducted in MLCommons Science
Working Group. This overview is produced with the hope that others
will have an easier time getting started and collaborate on the
activities related to MLCommons Cloud Masking Benchmark.
\end{abstract}
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10010405.10010432.10010437</concept_id>
<concept_desc>Applied computing~Earth and atmospheric sciences</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10010147.10010178.10010224.10010240.10010241</concept_id>
<concept_desc>Computing methodologies~Image representations</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10002951.10003317.10003359.10003360</concept_id>
<concept_desc>Information systems~Test collections</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10010583.10010737.10010749</concept_id>
<concept_desc>Hardware~Testing with distributed and parallel systems</concept_desc>
<concept_significance>500</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Applied computing~Earth and atmospheric sciences}
\ccsdesc[500]{Computing methodologies~Image representations}
\ccsdesc[500]{Information systems~Test collections}
\ccsdesc[500]{Hardware~Testing with distributed and parallel systems}
%%
%% Keywords. The author(s) should pick words that accurately describe
%% the work being presented. Separate the keywords with commas.
\keywords{cloud masking, cloudmesh, datasets, MLCommons, benchmark}
\received[Version from]{8. June, revised 9 December, 2023}
\begin{comment}
\begin{center}
{\huge\bf \TITLE}
\end{center}
% \listoftodos
\tableofcontents
\listoffigures
\listoftables
\clearpage
\end{comment}
\settopmatter{printfolios=true}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{MLCommons Cloud Masking Activities}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\nocite{las-2023-ai-workflow} % add to avoid running bibtex multiple times
The Cloud Masking Benchmark is part of the research currently conducted
by MLCommons \cite{Farrell2021MLPerfHA} Science Working Group
\cite{www-mlcommons-science-github}. We hope that others will
contribute to this document to enhance its scope. Please contact
Gregor von Laszewski ([email protected]) so that we can coordinate
with you.
As of this moment, we are aware of several activities regarding
MLCommons Cloud Masking Benchmark.
\begin{enumerate}
\item The original benchmark was contributed by Samuel Jackson and
Juri Papaya \cite{Thiyagalingam2022AIBF,jackson-2020-eu} from
Rutherford Labs. The reference implementation is based on U-Net
\cite{RFB15a}.
\item A cloud masking benchmark activity by Junji Yin on PEARL
\cite{Thiyagalingam2022AIBF}.
\item A number of activities carried out by Gregor von Laszewski on
Rivanna, University of Virginia's High Performance Computing
Cluster. This activity contains significant contributions:
\begin{enumerate}
\item Introduction of a README to showcase how to run the code
that has been reused and modified successfully by others.
\item Introduction of target directories that showcase how to use
templates to run cloud masking benchmarks on various HPC machines,
DGX station, and a Linux desktop with an NVIDIA card.
\item Introduction of enhanced timers to measure execution time
for different parts of the benchmark program.
\item Usage of Cloudmesh StopWatch to provide easy human readable
timers that can be parsed with little effort through exports as
CSV data.
\item Development and usage of a hyper-parameter permutation
framework that enables the cloud masking model to be experimented
with different hyper-parameters, including epochs, batch sizes,
learning rates, etc. This work is also reused in other MLCommons
Science Benchmarks \cite{las22-cloudmesh-cc-reu}. The work
simplies benchmark results following the FAIR principal while
integrating the hyperparameters as metadata.
\item Development of a workflow system that enables the use of
hybrid compute resources through templates
\cite{las-2023-escience-cloudmask}.
\item Application of the aforementioned work to education
\cite{las-2023-mlcommons-edu-eq}.
\item Hosting of a development repository for MLCommons Cloud Masking
Benchmark code base, as part of MLCommons Science Working Group
\cite{github-laszewsk-mlcommons}.
\item Execution of a substantial number of benchmark experiments.
\end{enumerate}
\item A number of activities by New York University ("NYU") AI for
Scientific Research (AIFSR) Benchmark Team on Greene, NYU High
Performance Computing Cluster.
\begin{enumerate}
\item Modification of reference implementation to include early
stopping into model training, built onto the activities from
Rutherford Labs and UVA.
\item Implementation of a new accuracy metric introduced by Samuel
Jackson, Juri Papaya, and Gregor von Laszewski.
\item Coordinated the benchmark experiments with bash script that
replicates a small number of features from the previous more
comprehensive activity conducted by Gregor von Laszewski.
\end{enumerate}
NYU AIFSR's benchmark activity contains a limited number of
experiments in contrast to the activity done by Gregor von
Laszewski. A joint report of both efforts is under
preparation. Several versions of the report were started such
as \cite{las23-cloudmask}. The latest report is not yet
available.
\end{enumerate}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Overview of Cloud Masking and its Related Work}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Since last century, several methods have been developed for cloud
masking, ranging from rule-based techniques
\cite{Saunders1986AnAS,Saunders1988AnIM,Merchant2005ProbabilisticPB,
Zhu2012ObjectbasedCA} to modern deep learning approaches
\cite{Li2019DeepLB,Domnich2021KappaMaskAC,Yan2018CloudAC,WIELAND2019111203,JEPPESEN2019247}. Among
the more traditional, rule-based techniques, two popular methodologies
have been threshold cloud screening
\cite{Saunders1986AnAS,Saunders1988AnIM} and Bayesian cloud masking
\cite{Merchant2005ProbabilisticPB}.
Threshold screening methods consist of several threshold tests where
spectral and spatial properties of satellite images are compared with
those ranges that are believed to indicate the presence of a clear sky
pixel. And those other pixels that are not labeled as clear sky are
then flagged as cloudy. This school of methodologies was widely used
from the late 1980s to the early 2000s
\cite{Merchant2005ProbabilisticPB}.
The gradual transition away from threshold screening methods was due
to its long-criticized limitations: firstly, threshold settings rely
heavily on domain expertise about indicators of cloudiness that may
not be objective, which also makes later modification and updates
difficult; secondly, thresholds provide users little flexibility in
the trade-off between coverage and accuracy; third, threshold tests do
not make use of all available prior information. These shortcomings of
threshold screening methods are improved by later developed Bayesian
methods \cite{Merchant2005ProbabilisticPB}.
The Bayesian approach applies Bayes' theorem on prior meteorology
information to deduce for each pixel the probability of containing
cloud or clear sky, and thereafter generating a cloud mask as
output. As a result, these Bayesian approaches are fully probabilistic
and make good use of prior information. Compared to threshold tests,
Bayesian methods achieve better accuracy in predicting pixels'
cloudiness, offering generality and conceptual clarity in its
approach, and enhancing maintainability and adaptability largely
\cite{Merchant2005ProbabilisticPB}.
More recently, the rising popularity of deep learning has led to the
use of CNNs for generating cloud masks. Deep learning methods
\cite{Li2019DeepLB,Domnich2021KappaMaskAC,Yan2018CloudAC,WIELAND2019111203,JEPPESEN2019247}
use computer vision models (CNNs) and treat the cloud masking task as
that of image segmentation tasks. CNNs have achieved superior
performance thanks to their automatic feature extraction ability. A
research paper published in 2019 \cite{JEPPESEN2019247} introduces
Remote Sensing Network (RS-Net), which is a CNN architecture branched
out of U-Net \cite{RFB15a} for cloud masking and was shown to achieve
higher performance compared to the state-of-the-art rule-based
approach known as Fmask \cite{Zhu2012ObjectbasedCA}. KappaMask
\cite{Domnich2021KappaMaskAC} and MSCFF \cite{Li2019DeepLB} are two
additional U-Net based CNN model that outperformed Fmask. All these
models have reported their performances on several satellite images
such as Sentinel-2, Landsat, etc., and also made use of
human-annotated (some assisted by software) ground truth values (See
in Table \ref{tab:datasets}). On the other hand, MLCommons Cloud Masking
Benchmark operates on SLSTR images from the newer Sentinel-3
satellite, which uses Bayesian approach generated cloud masks as
ground truth. The reference implementation provided by MLCommons
Science Working Group achieved 92\% classification accuracy on the
Sentinel-3 test set \cite{Thiyagalingam2022AIBF}.
The aforementioned deep learning approaches towards cloud masking are
by no means exhaustive. If you know about other significant cloud
masking or deep learning approaches, please inform us and we will add
them here.
\begin{table*}[htb]
\centering
\caption{This table presents the several methods used for cloud masking with their respective dataset, ground truth, and performance.}
\label{tab:datasets}
\resizebox{1.8\columnwidth}{!}{
\begin{tabular}{|l|c|l|l|l|l|}
\hline
{\bf} & {\bf Reference} & {\bf Dataset} & {\bf Ground-truth} & {\bf Model} & {\bf Accuracy} \\ \hline
1 & \cite{Merchant2005ProbabilisticPB} & ATSR-2 & Human annotation & Bayesian screening & 0.917\\ \hline
2 & \cite{WIELAND2019111203} & Sentinel-2 & Software-assisted human annotation (QGIS) & U-Net & 0.90 \\ \hline
3 & \cite{WIELAND2019111203} & Landsat TM & Software-assisted human annotation (QGIS) & U-Net & 0.89 \\ \hline
4 & \cite{WIELAND2019111203} & Landsat ETM+ & Software-assisted human annotation (QGIS) & U-Net & 0.89 \\ \hline
5 & \cite{WIELAND2019111203} & Landsat OLI & Software-assisted human annotation (QGIS) & U-Net & 0.91 \\ \hline
6 & \cite{Li2019DeepLB} & GaoFen-1 & Human annotation & MFFSNet & 0.98, mIOU = 0.87 \\ \hline
7 & \cite{Domnich2021KappaMaskAC} & Sentinel 2 & Software-assisted human annotation (CVAT) & KappaMask & 0.91 \\ \hline
8 & \cite{JEPPESEN2019247} & Landsat 8 Biome and SPARCS & Human annotation & RS-Net & 0.956 \\ \hline
\end{tabular}}
\end{table*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Dataset}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
For MLCommons Cloud Masking Benchmark, we use the satellite images from Sentinel-3.
\subsection{Sentinel-3}
According to \cite{Sentinel84:online}
``Sentinel-3 is an ocean and land mission composed of two identical satellites (Sentinel-3A and Sentinel-3B).''
Sentinel-3 makes use of multiple sensing instruments to accomplish its objectives:
\begin{itemize}
\item SLSTR (Sea and Land Surface Temperature Radiometer)
\item OLCI (Ocean and Land Colour Instrument)
\item SRAL (SAR Altimeter)
\item DORIS (Doppler Orbitography and Radiopositioning Integrated by Satellite)
\item MWR (Microwave Radiometer).
\end{itemize}
``SLSTR and OLCI are optical instruments that are used to provide data continuity for ENVISAT's AATSR and MERIS instruments and the swaths of the two instruments overlap, allowing for new combined applications. OLCI is a medium-resolution imaging spectrometer, using five cameras to provide a wide field of view.
SRAL, DORIS, MWR and LRR are used for topographic measurements of the ocean and inland water.'' \cite{Sentinel84:online}
One of the satellites is shown in Figure~\ref{fig:sat}. The Mission
Orbit is sun-synchronous, set at a height of 814.5km with an
inclination of 98.65$^{o}$ and a repeat cycle of 27 days
\cite{Sentinel84:online}.
\begin{figure}[htb]
\centering\includegraphics[width=0.8\columnwidth]{images/sentinel-3.jpg}
\caption{A Sentinel-3 Satelite.}
\label{fig:sat}
\end{figure}
\subsection{MLCommons Cloud Masking Dataset}
MLCommons Cloud Masking Benchmark uses 180GB of satellite images from
Sentinel-3 SLSTR (Level-1 processing, TOA Radiances and Brightness
Temperature) satellite images. The dataset consists of 1070 images,
captured during days and nights. The dataset also includes a cloud
masking for each image, generated using Bayesian techniques. The
reference implementation uses these cloud masks as ground truths for
training and testing.
The dataset comes with the train-test split, where 970 images are used
for training, and 100 images are used for testing. The images are of
the dimension $1200 \times 1500$ with 15 different channels and 1
channel of Bayesian mask. Among the 15 channels, 3 channels are used
to represent brightness, 6 channels are used to represent reflectance,
and the remaining 6 channels are used to represent radiance. However,
for the provided reference implementation, only a total of 10
channels, i.e., 6 channels of reflectance, 3 channels of brightness,
and 1 channel of Bayesian mask are used as model inputs for training
and testing.
\begin{figure*}[htb]
\centering\includegraphics[width=0.75\textwidth]{images/cloudmask-preprocessing-training-data-2.pdf}
\caption{The preprocessing of the training data.}
\label{fig:preprocessing-training}
\bigskip
\centering\includegraphics[width=0.75\textwidth]{images/cloudmask-preprocessing-testing-data-2.pdf}
\caption{The preprocessing of testing data}
\label{fig:preprocessing-testing}
\end{figure*}
\subsection{Data Loading and Preprocessing} \label{Preprocessing}
For training data preprocessing, the images are first cropped from the dimension of $1200 \times 1500 \times 9$ to $1024 \times 1280 \times 9$ and then divided into 20 smaller-sized $256 \times 256 \times 9$ patches. After creating these patches out of each image in the training set, we get a total of $19400$ patches for training. These patches are further split into training and validation sets with $80/20$ split ratio, and then sent for training after shuffling.
For the test dataset, the images are neither cropped nor shuffled. Instead, each test image is cut into 63 smaller patches of dimension $256 \times 256 \times 9$, by applying a horizontal and vertical stride of 176 pixels with zeros padding on the right and bottom edges of each image. We then get a total of $6300$ patches for the entire test dataset. After getting the predictions from the model, these $256 \times 256 \times 1$ output patches (predicted cloud mask) are reconstructed to the size of $1200 \times 1500 \times 1$ and then evaluated with the Bayesian mask ground truth that has the same dimension. This preprocessing pipelines for training and testing are shown in Figure \ref{fig:preprocessing-training} and Figure \ref{fig:preprocessing-testing}.
\subsection{Training}
During training, the model takes a preprocessed patch of dimension
$256 \times 256 \times 9$, and generates a cloud mask of dimension
$256 \times 256 \times 1$. Once the cloud masks have been generated by
the model during training, the accuracy is reported as the percentage
of total pixels that are correctly classified compared to ground
truth.
\subsection{Testing}
During testing, the model generates a cloud mask of dimension
$256 \times 256 \times 1$ for each $256 \times 256 \times 9$
patch. For each pixel in the image, the model outputs the probability
of that pixel containing a clear sky. Pixels that have a probability
higher than 50\% are labeled as clear sky, and cloudy otherwise. Then,
those patches are then reconstructed back to full-size masks of
dimension $1200 \times 1500 \times 1$.
The locations of the images used in the testing are depicted in
Figure~\ref{fig:frames-inference} as well as their coordinate centers
in Figure~\ref{fig:frames-dot}. As one can observe from the figures,
most of the testing images are captured in the region of North
Altantic Ocean and of the West Coast of Europe. Furthermore, we
display in Figure \ref{fig:frames-raw} the raw satellite images from
the Sentinel-3 database that reflect the locations where the testing
images are located. Figure \ref{fig:frames-mask} shows the cloud masks
of the testing images. The Table \ref{tab:inference} shows the
individual attributes for the specific locations identified by a
counter.
\begin{figure*}[htb]
\centering\includegraphics[width=0.75\textwidth]{images/inference-frames.pdf}
\caption{The location of the satellite images represented as frames used for inference.}
\label{fig:frames-inference}
\centering\includegraphics[width=0.8\textwidth]{images/inference-dots.png}
\caption{The location of the center of the satellite images used for inference.}
\label{fig:frames-dot}
\end{figure*}
\begin{figure*}[htb]
\centering\includegraphics[width=0.8\textwidth]{images/raw-output.png}
\caption{The raw satellite images at the locations where inference is chosen.}
\label{fig:frames-raw}
\end{figure*}
\begin{figure*}[htb]
\centering\includegraphics[width=0.8\textwidth]{images/masks-output.png}
\caption{The mask images at the locations where inference is chosen.}
\label{fig:frames-mask}
\end{figure*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Conclusion}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this paper, we provide a list of related activities under MLCommons
Cloud Masking Benchmark. The paper includes an overview of related
research in the field of cloud masking in general. In addition, the
paper provides a walk-through and illustration of the Sentinel-3
satellite image dataset used for MLCommons Cloud Masking Benchmark. With
this paper, we hope to enhance communication between activities on
this benchmark and support others to have an easier time getting
started with the MLCommons Cloud Masking benchmark.
\begin{acks}
Work was in part funded by (a) NIST 60NANB21D151T (b) NSF
CyberTraining: CIC: CyberTraining for Students and Technologies from
Generation Z with the award numbers 1829704 and 2200409, and (c)
Department of Energy under the grant Award No. DE-SC0023452 (d) NSF
Collaborative Research: Framework: Software: CINES: A Scalable
Cyberinfrastructure for Sustained Innovation in Network Engineering
and Science with the award number 2210266. The work from the UVA
team was conducted at the Biocomplexity Institute at
the University of Virginia. We like to thank the NYU AIFSR team for
their contributions. We especially like to thank Ruochen Gu who
continued to work on this project on voluntary basis.
\end{acks}
%%
%% The next two lines define the bibliography style to be used, and
%% the bibliography file.
\bibliographystyle{ACM-Reference-Format}
%\bibliographystyle{IEEEtran}
\bibliography{vonLaszewski-cloudmask-related}
%%
%% If your work has an appendix, this is the place to put it.
\section*{Contributions}
Ruochen Gu has conducted the work of identifying related research as a
student researcher from NYU AIFSR Benchmark Team. He continued this
work on a voluntary basis due to his interest in this project. {\em
GvL} has contributed significantly to porting cloud masking onto
different machines while making the code portable and contributed the
cloudmesh-ee workflow code, the cloudmesh StopWatch, and integrated
the cloudmesh timers and logging, into the code. He ran all benchmarks
on Rivanna and the Desktop. In discussions with Rutherford Lab, a new
accuracy value was introduced that was not included in the original
version distributed by MLCommons. He also facilitated many hackathons
with the NYU team. The work described here is cited in the NYU report.
\begin{comment}
\section{FINDS Sentinel-3}
\TODO{some additinal refs that have not been looked at or could lead to other things OR NOT}
\begin{itemize}
\item \url{https://eemont.readthedocs.io/en/0.3.1/tutorials/007-Clouds-Masking-Sentinel-3.html}
\item \url{https://www.eumetsat.int/S3-synergy-cloud-mask}
\item \url{https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-3-slstr/level-1/cloud-identification}
\item \cite{FERNANDEZMORAN2021238}
\item \cite{picchiani2018}
\item \url{file:///scratch2/Downloads/IdePix_Sentinel-3_OLCI_ATBD_v1.0.pdf}
\item \cite{amt-15-7195-2022}
\item \cite{SKAKUN2022112990}
\item \url{https://developers.google.com/earth-engine/tutorials/community/sentinel-2-s2cloudless}
\item \url{https://sentinels.copernicus.eu/documents/247904/2731673/S3_TN_RAL_SL_032+-Issue+8.0+version1.0-++SLSTR+L1+ATBD.pdf/fb45d35c-0d87-dca6-ea3c-dc7c2215b5bc?t=1656685672747}
\end{itemize}
\end{comment}
\clearpage
\appendix
\onecolumn
\section{Table of Locations Used for Inference}
\input{mask-table}
% \clearpage
\section{Naming Convention}
\url{https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-3-slstr/naming-convention}
The file naming convention of SLSTR products
(\href{https://earth.esa.int/documents/247904/1964331/Sentinel-3_PDGS_File_Naming_Convention}{see
Sentinel-3 PDGS File Naming Convention for more details}) is identified
by the sequence of fields described below:
\emph{\textbf{MMM\_SL\_L\_TTTTTT\_yyyymmddThhmmss\_YYYYMMDDTHHMMSS\_YYYYMMDDTHHMMSS\_{[}instance
ID{]}\_GGG\_{[}class ID{]}.SEN3}}
where:
\begin{itemize}
\item
\textbf{MMM}~is the mission ID:
\begin{itemize}
\item
S3A = SENTINEL-3A
\item
S3B = SENTINEL-3B
\item
S3\_ = for both SENTINEL-3A and 3B
\end{itemize}
\item
\textbf{SL}~is the data source/consumer (SL = SLSTR)
\item
\textbf{L}~is the processing level
(
\begin{itemize}
\item
"0" for Level-0
\item
"1" for Level-1
\item
"2" for Level-2
\item
underscore "\_" if processing level is not applicable
\end{itemize}
\item
\textbf{TTTTTT}~is the data Type ID
\begin{itemize}
\item
Level 0 SLSTR data:
\begin{itemize}
\item
"SLT\_\_\_" = ISPs.
\end{itemize}
\item
Level-1 SLSTR data:
\begin{itemize}
\item
"RBT\_\_\_" = TOA Radiances and Brightness Temperature
\item
"RBT\_BW" = browse product derived from "RBT\_\_\_".
\end{itemize}
\item
Level-2 SLSTR data:
\begin{itemize}
\item
"WCT\_\_\_" = 2 and 3 channels SST for nadir and along track view
\item
"WST\_\_\_" = L2P sea surface temperature
\item
"LST\_\_\_" = land surface temp
\item
"FRP\_\_\_" = Fire Radiative Power
\item
"WST\_BW" = browse product derived from "WST\_\_\_"
\item
"LST\_BW" = browse product derived from "LST\_\_\_".
\end{itemize}
\end{itemize}
\item
\textbf{yyyymmddThhmmss}~is the sensing start time
\item
\textbf{YYYYMMDDTHHMMSS}~is the sensing stop time
\item
\textbf{YYYYMMDDTHHMMSS}~is the product creation date
\item
\textbf{{[}instance ID{]}~}consists of 17 characters, either uppercase
letters or digits or underscores "\_".
\end{itemize}
\begin{quote}
The instance id fields include the following cases, applicable as
indicated:
\begin{enumerate}
\item Instance ID for the instrument data products disseminated in
"stripes":
Duration,"\_", cycle number, "\_", relative orbit number,"\_", 4
underscores "\_", i.e.\\
DDDD\_CCC\_LLL\_\_\_\_\_\\
\item Instance ID for the instrument data products ~\\
disseminated in "frames":\\
Duration, "\_", cycle number, "\_", relative orbit number, "\_", frame
along track coordinate, i.e.\\
DDDD\_CCC\_LLL\_FFFF\\
\item Instance ID for the instrument data products disseminated in
"tiles".\\
Two sub-cases are applicable:
a) tile covering the whole globe:\\
\hspace*{0.333em}\hspace*{0.333em}\hspace*{0.333em}\hspace*{0.333em}\hspace*{0.333em}
"GLOBAL\_\_\_\_\_\_\_\_\_\_\_"\\
b) tile cut according to specific\\
geographical criteria:\\
Tile Identifier\\
ttttttttttttttttt\\
\item Instance ID for auxiliary data:\\
17 underscores "\_"
\end{enumerate}
\end{quote}
\begin{itemize}
\item
\textbf{GGG}~identifies the centre which generated the file
\item
\textbf{{[}class ID{]}}~identifies the class ID for instrument data
products with conventional sequence~\textbf{P\_XX\_NNN}~where:
\begin{itemize}
\item
P indicates the platform (O for operational, F for reference, D for
development, R for reprocessing)
\item
XX indicates the timeliness of the processing workflow (NR for NRT,
ST for STC, NT for NTC)
\item
NNN indicates the baseline collection or data usage.
\end{itemize}
\item
\textbf{.SEN3}~is the filename extension
\end{itemize}
Example of filename:
\verb|S3A_SL_2_LST____20151229T095534_20151229T114422_20160102T150019_6528_064_365______LN2_D_NT_001.SEN3|
\section{Inference Files}
\begin{verbatim}
S3A_SL_1_RBT____20191001T113221_20191001T113521_20191002T153211_0179_050_023_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191002T124409_20191002T124709_20191003T180638_0180_050_038_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191002T124709_20191002T125009_20191003T180806_0179_050_038_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191002T125609_20191002T125909_20191003T181226_0179_050_038_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191003T122059_20191003T122359_20191004T173532_0179_050_052_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191003T122659_20191003T122959_20191004T173822_0179_050_052_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191004T114848_20191004T115148_20191005T172918_0179_050_066_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191004T115148_20191004T115448_20191005T173023_0179_050_066_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191004T115448_20191004T115748_20191005T173129_0180_050_066_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191004T120048_20191004T120348_20191005T173344_0179_050_066_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191005T112237_20191005T112537_20191006T161503_0179_050_080_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191005T112537_20191005T112837_20191006T161611_0179_050_080_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191005T112837_20191005T113137_20191006T161718_0179_050_080_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191006T105626_20191006T105926_20191007T153322_0179_050_094_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191006T124025_20191006T124325_20191007T170714_0179_050_095_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191006T125225_20191006T125525_20191007T171148_0179_050_095_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T121414_20191007T121714_20191009T123433_0179_050_109_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T121714_20191007T122014_20191009T123540_0179_050_109_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T122614_20191007T122914_20191009T123903_0179_050_109_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191008T114803_20191008T115103_20191009T162306_0179_050_123_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191008T115103_20191008T115403_20191009T162431_0179_050_123_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191008T115703_20191008T120003_20191009T162735_0179_050_123_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191009T112453_20191009T112753_20191010T171844_0180_050_137_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191009T195549_20191009T195849_20191011T011836_0179_050_142_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191012T114719_20191012T115019_20191013T162441_0179_050_180_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191014T123257_20191014T123557_20191015T172725_0179_050_209_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191014T123557_20191014T123857_20191015T172851_0179_050_209_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191014T124457_20191014T124757_20191015T173252_0179_050_209_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T120646_20191015T120946_20191016T160534_0179_050_223_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T120946_20191015T121246_20191016T160700_0179_050_223_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T121546_20191015T121846_20191016T160934_0179_050_223_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191017T130723_20191017T131023_20191018T181427_0179_050_252_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191017T194820_20191017T195120_20191019T014123_0180_050_256_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191018T122912_20191018T123212_20191019T172627_0180_050_266_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191019T120301_20191019T120601_20191020T172539_0179_050_280_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191019T120601_20191019T120901_20191020T172659_0179_050_280_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191019T121201_20191019T121501_20191020T172947_0179_050_280_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191020T113650_20191020T113950_20191021T155904_0179_050_294_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191020T113950_20191020T114250_20191021T160028_0179_050_294_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191022T122528_20191022T122828_20191023T162316_0179_050_323_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191022T122828_20191022T123128_20191023T162424_0179_050_323_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T115917_20191023T120217_20191024T181537_0179_050_337_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T120217_20191023T120517_20191024T181700_0179_050_337_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T120817_20191023T121117_20191024T181932_0179_050_337_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191024T113606_20191024T113906_20191025T164619_0180_050_351_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191024T114206_20191024T114506_20191025T164855_0179_050_351_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191026T122143_20191026T122443_20191027T165912_0179_050_380_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191026T122443_20191026T122743_20191027T170026_0179_050_380_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191026T123343_20191026T123643_20191027T170426_0180_050_380_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191027T115532_20191027T115832_20191028T171044_0179_051_009_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191027T115832_20191027T120132_20191028T171322_0179_051_009_1980_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191027T120432_20191027T120732_20191028T171546_0179_051_009_2340_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191028T131020_20191028T131320_20191029T181138_0179_051_024_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191029T124409_20191029T124709_20191030T172048_0179_051_038_1800_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191029T125609_20191029T125909_20191030T172609_0179_051_038_2520_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191001T230116_20191001T230416_20191003T022222_0179_050_030_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191002T223505_20191002T223805_20191004T031930_0179_050_044_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191003T220854_20191003T221154_20191005T030445_0179_050_058_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191003T234353_20191003T234653_20191005T041723_0179_050_059_0540_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191004T231742_20191004T232042_20191006T041726_0179_050_073_0540_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191005T225731_20191005T230031_20191007T024922_0179_050_087_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191006T223121_20191006T223421_20191008T021301_0179_050_101_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T014419_20191007T014719_20191008T061144_0179_050_103_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T202711_20191007T203011_20191009T155246_0179_050_114_1080_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191007T220510_20191007T220810_20191010T073003_0179_050_115_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191008T114503_20191008T114803_20191009T162144_0179_050_123_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191009T093753_20191009T094053_20191010T151007_0179_050_136_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191009T111853_20191009T112153_20191010T171717_0179_050_137_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191010T071243_20191010T071543_20191011T114643_0179_050_149_0540_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191010T105242_20191010T105542_20191011T160548_0179_050_151_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191011T014035_20191011T014335_20191012T060733_0179_050_160_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191011T102631_20191011T102931_20191012T152504_0180_050_165_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191011T184527_20191011T184827_20191013T002531_0179_050_170_1260_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191012T181916_20191012T182216_20191013T230919_0179_050_184_1260_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191013T172106_20191013T172406_20191014T215723_0179_050_197_5400_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191013T175305_20191013T175605_20191014T222812_0180_050_198_1260_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191013T225003_20191013T225303_20191015T030239_0179_050_201_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191014T222352_20191014T222652_20191016T015710_0179_050_215_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T102247_20191015T102547_20191016T142544_0179_050_222_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T120346_20191015T120646_20191016T160531_0179_050_223_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191015T215741_20191015T220041_20191017T010323_0179_050_229_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191016T075436_20191016T075736_20191017T123959_0179_050_235_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191016T095636_20191016T095936_20191017T152606_0179_050_236_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191016T195331_20191016T195631_20191018T012929_0179_050_242_1080_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191017T004429_20191017T004729_20191018T060449_0179_050_245_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191018T222008_20191018T222308_20191020T032233_0179_050_272_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191019T120001_20191019T120301_20191020T172512_0179_050_280_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191019T215357_20191019T215657_20191021T014757_0179_050_286_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191020T194947_20191020T195247_20191022T003158_0179_050_299_1080_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191021T004044_20191021T004344_20191022T051308_0180_050_302_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T115617_20191023T115917_20191024T181537_0180_050_337_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T201213_20191023T201513_20191025T014837_0179_050_342_1080_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191023T215012_20191023T215312_20191025T023107_0179_050_343_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191025T003700_20191025T004000_20191026T054924_0180_050_359_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191027T101133_20191027T101433_20191028T144445_0179_051_008_1620_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191027T214628_20191027T214928_20191029T013217_0179_051_015_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191029T003315_20191029T003615_20191030T050058_0179_051_031_0360_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191029T223505_20191029T223805_20191031T015742_0179_051_044_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191030T220854_20191030T221154_20191101T025713_0179_051_058_0900_LN2_O_NT_003.hdf
S3A_SL_1_RBT____20191031T231742_20191031T232042_20191102T034613_0179_051_073_0540_LN2_O_NT_003.hdf
\end{verbatim}
\end{document}
\endinput
%%
%% End of file `sample-sigplan.tex'.