From a1996b445d31657595f6eb82916f399285209194 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Thu, 9 Feb 2017 10:59:02 -0700
Subject: [PATCH 01/23] modifying travis to use gfortran 5

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1b74a82..b80f5ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ addons:
     source:
       - ubuntu-toolchain-r-test
     packages:
-      - gfortran
+      - gfortran-5
       - libnetcdf-dev
       - liblapack-dev
 script:

From 936ddfe6ba5cbccbb235e073a68b8dafc27009f9 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Thu, 9 Feb 2017 11:10:48 -0700
Subject: [PATCH 02/23] modifying travis to use gfortran 5 take 2

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index b80f5ae..0cef0ed 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,7 @@
 sudo: false
 language: fortran
 compiler:
-  - gfortran
+  - gfortran-5
 os:
   - linux
 addons:

From e96c2d4b6121c3731db9f2dae38ab2a8ecf78c7b Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Thu, 9 Feb 2017 11:13:46 -0700
Subject: [PATCH 03/23] modifying travis to use gfortran 5 take 3

---
 .travis.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0cef0ed..1da524c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,15 +1,16 @@
 sudo: false
 language: fortran
 compiler:
-  - gfortran-5
+  - gfortran-6
 os:
   - linux
+env: TESTID='GARD_linux'
 addons:
   apt:
     source:
       - ubuntu-toolchain-r-test
     packages:
-      - gfortran-5
+      - gfortran-6
       - libnetcdf-dev
       - liblapack-dev
 script:

From 3a6cf3fe3982b47437d3ef2bc6e71051499c79ab Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Thu, 9 Feb 2017 13:42:05 -0700
Subject: [PATCH 04/23] Updating travis take 4

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 1da524c..fd946a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,7 @@ os:
 env: TESTID='GARD_linux'
 addons:
   apt:
-    source:
+    sources:
       - ubuntu-toolchain-r-test
     packages:
       - gfortran-6

From ad63ff5cfef468cda5478ac8c230bf9edc0261ae Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 22:44:19 -0700
Subject: [PATCH 05/23] Moved random routines into random module, removed
 dependency on gfortran>5

---
 src/downscaling_stats/downscaling.f90 |  45 ++----------
 src/makefile                          |   6 +-
 src/utilities/random.f90              | 100 ++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 42 deletions(-)
 create mode 100644 src/utilities/random.f90

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index 5244305..07bf46c 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -9,6 +9,7 @@ module downscaling_mod
     use time_util,          only : setup_time_indices
     use basic_stats_mod,    only : stddev
     use io_routines,        only : file_exists, io_read
+    use random_mod,         only : box_muller_random
     implicit none
 
     integer*8, dimension(10) :: master_timers
@@ -560,46 +561,6 @@ subroutine transform_data(transform_type, pred_data,  &
         end select
     end subroutine transform_data
 
-    !>------------------------------------------------
-    !! Use the Box-Muller Transform to convert uniform to normal random deviates
-    !!
-    !! Note random_sample should be an allocated 1D real array
-    !! On return, random_sample will be filled with random normal (0,1) data
-    !!
-    !! Caveat: this transform is unable to generate extremely high values (>6.66)
-    !! Having switched to double precision it may do better now.
-    !!
-    !-------------------------------------------------
-    subroutine box_muller_random(random_sample)
-        implicit none
-        real, intent(inout) :: random_sample(:)
-        integer :: n,i
-
-        double precision :: u1, u2, s
-        double precision, allocatable :: double_random(:)
-
-        n = size(random_sample)
-        allocate(double_random(n))
-        call random_number(double_random)
-
-        do i=1,n,2
-            u1 = double_random(i)
-            if (i<n) then
-                u2 = double_random(i+1)
-            else
-                call random_number(u2)
-            endif
-
-            s = sqrt(-2 * log(u1))
-            random_sample(i) = s * cos(2 * kPI * u2)
-            if (i<n) then
-                random_sample(i+1) = s * sin(2 * kPI * u2)
-            endif
-
-        enddo
-
-    end subroutine box_muller_random
-
     function downscale_point(predictor, atm, obs_in, errors, output_coeff, logistic, logistic_threshold, options, timers, xpnt, ypnt) result(output)
         implicit none
         real,    dimension(:,:), intent(inout):: predictor, atm ! (ntimes, nvars)
@@ -645,7 +606,9 @@ function downscale_point(predictor, atm, obs_in, errors, output_coeff, logistic,
             return
         endif
 
-        allocate(obs, source=obs_in)
+        allocate(obs(size(obs_in)))
+        obs = obs_in
+
         if (options%time_smooth > 0) then
             do i=1, n
                 start = max(1, i - options%time_smooth)
diff --git a/src/makefile b/src/makefile
index b67a281..c1d231d 100644
--- a/src/makefile
+++ b/src/makefile
@@ -282,6 +282,7 @@ OBJS =  \
 	$(BUILD)obs_io.o    		\
 	$(BUILD)geo_reader.o      	\
     $(BUILD)basic_stats.o     	\
+	$(BUILD)random.o            \
 	$(BUILD)sorting.o			\
 	$(BUILD)quantile_mapping.o  \
 	$(BUILD)analogs.o       	\
@@ -399,6 +400,9 @@ $(BUILD)quantile_mapping.o: utilities/quantile_map.f90 $(BUILD)data_structures.o
 $(BUILD)sorting.o: utilities/sorting.f90 utilities/qsort_inline.inc
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
+$(BUILD)random.o: utilities/random.f90 $(BUILD)model_constants.o
+	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
+
 $(BUILD)basic_stats.o: utilities/basic_stats.f90
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
@@ -454,7 +458,7 @@ $(BUILD)atmosphere_io.o: io/atmosphere_io.f90 $(BUILD)data_structures.o  \
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
 $(BUILD)downscaling.o: downscaling_stats/downscaling.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o \
-					$(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o
+					$(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o $(BUILD)random.o
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
 $(BUILD)analogs.o: downscaling_stats/analogs.f90 $(BUILD)string.o
diff --git a/src/utilities/random.f90 b/src/utilities/random.f90
new file mode 100644
index 0000000..4c27c31
--- /dev/null
+++ b/src/utilities/random.f90
@@ -0,0 +1,100 @@
+module random_mod
+
+    use model_constants
+
+    implicit none
+
+    double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8)
+    
+contains
+    
+    !>------------------------------------------------
+    !! Use the Box-Muller Transform to convert uniform to normal random deviates
+    !!
+    !! Note random_sample should be an allocated 1D real array
+    !! On return, random_sample will be filled with random normal (0,1) data
+    !!
+    !! Caveat: this transform is unable to generate extremely high values (>6.66)
+    !! Having switched to double precision it may do better now.
+    !!
+    !-------------------------------------------------
+    subroutine box_muller_random(random_sample)
+        implicit none
+        real, intent(inout) :: random_sample(:)
+        integer :: n,i
+
+        double precision :: u1, u2, s
+        double precision, allocatable :: double_random(:)
+
+        n = size(random_sample)
+        allocate(double_random(n))
+        call random_number(double_random)
+
+        do i=1,n,2
+            u1 = double_random(i)
+            if (i<n) then
+                u2 = double_random(i+1)
+            else
+                call random_number(u2)
+            endif
+
+            s = sqrt(-2 * log(u1))
+            random_sample(i) = s * cos(2 * kPI * u2)
+            if (i<n) then
+                random_sample(i+1) = s * sin(2 * kPI * u2)
+            endif
+
+        enddo
+
+    end subroutine box_muller_random
+    
+    !>------------------------------------------------
+    !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008)
+    !!
+    !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429.
+    !!
+    !!------------------------------------------------
+    function normal_cdf(normal) result(cdf)
+        implicit none
+        real :: normal
+        real :: cdf
+        
+        cdf = 0.5 * (1+ sqrt((1- exp(0-(root_pi_over_8 * normal**2))) ))
+        
+        if (normal < 0) then
+            cdf = 1 - cdf
+        endif
+        
+    end function normal_cdf
+
+    
+
+    subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold)
+        implicit none
+        real, intent(out) :: output(:)
+        real, intent(in)  :: mean_values(:), error_term(:), uniform_random_values(:)
+        real, intent(in), optional :: exceedence_probability(:)
+        real, intent(in), optional :: threshold
+        
+        integer :: i,n
+        real :: random_normal, poe, threshold_internal
+        
+        threshold_internal = 0
+        if (present(threshold)) threshold_internal = threshold
+        
+        n = size(mean_values)
+        
+        do i=1,n
+            if (present(exceedence_probability)) then
+                if (exceedence_probability(i) > (1-uniform_random_values(i))) then
+
+                    ! random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) )
+
+                endif
+            endif
+            
+        enddo
+        output = 1
+    end subroutine sample_distribution
+
+end module random_mod

From 4d61c736b62b93c2d2d0ce585eb15ebed598fb21 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 22:47:14 -0700
Subject: [PATCH 06/23] moving travis build back to gfortran default version

---
 .travis.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fd946a9..2c5b194 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,15 @@
 sudo: false
 language: fortran
 compiler:
-  - gfortran-6
+  - gfortran
 os:
   - linux
-env: TESTID='GARD_linux'
 addons:
   apt:
     sources:
       - ubuntu-toolchain-r-test
     packages:
-      - gfortran-6
+      - gfortran
       - libnetcdf-dev
       - liblapack-dev
 script:

From 47da3291292bf15f619451ca614c2592002cb620 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 22:52:54 -0700
Subject: [PATCH 07/23] changed parameter from int to real for gfortran
 (standards?) compatibility

---
 src/downscaling_stats/downscaling.f90 | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index 07bf46c..f91b556 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -12,10 +12,11 @@ module downscaling_mod
     use random_mod,         only : box_muller_random
     implicit none
 
+    real,    parameter :: LOG_FILL_VALUE = 1e-30
+    real,    parameter :: MAX_ALLOWED_SIGMA = 20
+    integer, parameter :: N_RANDOM_SAMPLES = 10000
+
     integer*8, dimension(10) :: master_timers
-    real, parameter :: LOG_FILL_VALUE = 1e-30
-    real, parameter :: MAX_ALLOWED_SIGMA = 20
-    real, parameter :: N_RANDOM_SAMPLES = 10000
     real :: random_sample(N_RANDOM_SAMPLES)
 
 contains

From a8b430398b761e9a05040a63aeb47bed8193aef5 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 23:35:11 -0700
Subject: [PATCH 08/23] Added test for random number module

---
 .gitignore                |  1 +
 src/makefile              | 13 +++++++++-
 src/tests/test_random.f90 | 53 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 src/tests/test_random.f90

diff --git a/.gitignore b/.gitignore
index 73894b5..1996925 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,4 +35,5 @@ test_regression
 test_calendar
 test_qm
 test_config
+test_random
 gard
diff --git a/src/makefile b/src/makefile
index c1d231d..ed3f33e 100644
--- a/src/makefile
+++ b/src/makefile
@@ -320,6 +320,10 @@ REGRESSION_TEST_OBJS = 			\
 	$(BUILD)regression.o		\
 	$(BUILD)test_regression.o
 
+RANDOM_TEST_OBJS = 				\
+	$(BUILD)test_random.o		\
+	$(BUILD)random.o			\
+	$(BUILD)model_constants.o
 
 # These are the primary user facing rules
 #
@@ -330,7 +334,7 @@ model: gard
 
 all: gard test
 
-test: test_calendar test_sort test_qm test_config test_regression
+test: test_calendar test_sort test_qm test_config test_regression test_random
 
 install: gard
 	cp gard $(INSTALL_DIR)/
@@ -359,10 +363,14 @@ test_qm : $(QM_TEST_OBJS)
 test_sort : $(SORT_TEST_OBJS)
 	$(FC) -o $@ $^ $(LDFLAGS)
 
+test_random : $(RANDOM_TEST_OBJS)
+	$(FC) -o $@ $^ $(LDFLAGS)
+
 test_config : $(CONFIG_TEST_OBJS)
 	$(FC) -o $@ $^ $(LDFLAGS)
 
 
+
 # specify specific builds and dependencies.
 
 # unit tests
@@ -381,6 +389,9 @@ $(BUILD)test_qm.o: tests/test_qm.f90 $(BUILD)data_structures.o $(BUILD)quantile_
 $(BUILD)test_config.o: tests/test_config.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o $(BUILD)configuration.o
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
+$(BUILD)test_random.o: tests/test_random.f90 $(BUILD)random.o $(BUILD)model_constants.o
+	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
+
 
 # Date and time relevant objects
 $(BUILD)time.o: timing/time.f90 $(BUILD)model_constants.o
diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
new file mode 100644
index 0000000..7970d8f
--- /dev/null
+++ b/src/tests/test_random.f90
@@ -0,0 +1,53 @@
+program test_random
+
+    use random_mod
+
+    implicit none
+
+    print*, "test_normal_to_cdf_conversion"
+    call test_normal_to_cdf_conversion()
+
+
+contains
+
+
+    subroutine test_normal_to_cdf_conversion()
+
+        implicit none
+        integer, parameter :: ntest_values = 23
+
+        ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf
+        real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, &
+                                               -1.50, -1.00, -0.50, 0.000, 0.500,  1.00,  1.50,  2.00, &
+                                                2.50,  3.00,  3.50,  4.00,  4.50,  5.00,  5.50]
+
+        real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, &
+                                              0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, &
+                                              0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, &
+                                              0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, &
+                                              0.9999966, 0.9999997, 0.9999999810]
+        logical :: passing
+        integer :: i
+
+        passing = .True.
+
+        ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails
+        do i = 1, ntest_values
+            if (abs(cdf_values(i) - normal_cdf( normal_values(i) ) ) > 2e-3) then
+                passing = .False.
+                print*, ""
+                print*, " Error = ",cdf_values(i) - normal_cdf( normal_values(i))
+                print*, " Expected = ", cdf_values(i)
+                print*, " Got: ",normal_cdf( normal_values(i)), " for:",normal_values(i)
+            endif
+        end do
+
+        if (passing) then
+            print*, " PASSED"
+        else
+            print*, " FAILED"
+        endif
+    end subroutine test_normal_to_cdf_conversion
+
+
+end program test_random

From 4b0db27a2672c5cebcbeb73ef2300073ea5fa3d3 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 23:46:28 -0700
Subject: [PATCH 09/23] Added test for box-muller transform

---
 src/tests/test_random.f90 | 50 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index 7970d8f..2abfa7c 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -7,10 +7,11 @@ program test_random
     print*, "test_normal_to_cdf_conversion"
     call test_normal_to_cdf_conversion()
 
+    print*, "test_box_muller_random"
+    call test_box_muller_random()
 
 contains
 
-
     subroutine test_normal_to_cdf_conversion()
 
         implicit none
@@ -49,5 +50,52 @@ subroutine test_normal_to_cdf_conversion()
         endif
     end subroutine test_normal_to_cdf_conversion
 
+    subroutine test_box_muller_random()
+        implicit none
+
+        integer, parameter :: n_random_values = 10000
+
+        real :: random_values(n_random_values)
+        real :: sigma, mean
+        integer :: i
+        logical :: passing
+
+        passing = .True.
+
+        call box_muller_random(random_values)
+
+        mean = sum(random_values) / size(random_values)
+        if (abs(mean) > 1e-2) then
+            print*, " Error failed computing mean: ",mean
+            passing = .False.
+        endif
+
+        sigma = 0
+        do i = 1, n_random_values
+            sigma = sigma + (random_values(i) - mean)**2
+        enddo
+        sigma = sqrt(sigma / (n_random_values - 1))
+        if (abs(sigma - 1) > 1e-2) then
+            print*, " Error failed computing standard deviation: ", sigma
+            passing = .False.
+        endif
+
+
+        if (minval(random_values) > (-3)) then
+            print*, " Error failed computing minimum value: ", minval(random_values)
+            passing = .False.
+        endif
+
+        if (maxval(random_values) < (3)) then
+            print*, " Error failed computing maximum value: ", maxval(random_values)
+            passing = .False.
+        endif
+
+        if (passing) then
+            print*, " PASSED"
+        else
+            print*, " FAILED"
+        endif
+    end subroutine test_box_muller_random
 
 end program test_random

From ac6524c5cafd8e318a7405304e84a870d7309289 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 23:50:29 -0700
Subject: [PATCH 10/23] Added test for box-muller transform

---
 src/tests/test_random.f90 | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index 2abfa7c..ca5d8c3 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -7,8 +7,10 @@ program test_random
     print*, "test_normal_to_cdf_conversion"
     call test_normal_to_cdf_conversion()
 
-    print*, "test_box_muller_random"
-    call test_box_muller_random()
+    print*, "test_box_muller_random(10000)"
+    call test_box_muller_random(10000)
+    print*, "test_box_muller_random(999)"
+    call test_box_muller_random(999)
 
 contains
 
@@ -50,10 +52,10 @@ subroutine test_normal_to_cdf_conversion()
         endif
     end subroutine test_normal_to_cdf_conversion
 
-    subroutine test_box_muller_random()
+    subroutine test_box_muller_random(n_random_values)
         implicit none
 
-        integer, parameter :: n_random_values = 10000
+        integer, intent(in) :: n_random_values
 
         real :: random_values(n_random_values)
         real :: sigma, mean

From fdfa28820fbca100632dbc72865af6889405d5f4 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 23:51:07 -0700
Subject: [PATCH 11/23] Added test for box-muller transform

---
 src/tests/test_random.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index ca5d8c3..9e7a500 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -10,7 +10,7 @@ program test_random
     print*, "test_box_muller_random(10000)"
     call test_box_muller_random(10000)
     print*, "test_box_muller_random(999)"
-    call test_box_muller_random(999)
+    call test_box_muller_random(9999)
 
 contains
 

From df46a9fc2c6f338df930c34e6019584b0da3d067 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Fri, 10 Feb 2017 23:52:28 -0700
Subject: [PATCH 12/23] Added (passing) test for box-muller transform with odd
 number of values

---
 src/tests/test_random.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index 9e7a500..63baa44 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -77,7 +77,7 @@ subroutine test_box_muller_random(n_random_values)
             sigma = sigma + (random_values(i) - mean)**2
         enddo
         sigma = sqrt(sigma / (n_random_values - 1))
-        if (abs(sigma - 1) > 1e-2) then
+        if (abs(sigma - 1) > 1e-1) then
             print*, " Error failed computing standard deviation: ", sigma
             passing = .False.
         endif

From 733373ccc3408875046289839818c5bd7bd5fb51 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Sat, 11 Feb 2017 02:06:47 -0700
Subject: [PATCH 13/23] Added capability to compute a corresponding random
 normal from a uniform random, and tests

---
 src/tests/test_random.f90 |  49 +++++++++++++
 src/utilities/random.f90  | 150 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 194 insertions(+), 5 deletions(-)

diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index 63baa44..c1dbf5b 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -4,16 +4,65 @@ program test_random
 
     implicit none
 
+    print*, "-------------------------------------"
     print*, "test_normal_to_cdf_conversion"
     call test_normal_to_cdf_conversion()
 
+    print*, "-------------------------------------"
     print*, "test_box_muller_random(10000)"
     call test_box_muller_random(10000)
+    print*, "-------------------------------------"
     print*, "test_box_muller_random(999)"
     call test_box_muller_random(9999)
 
+    print*, "-------------------------------------"
+    print*, "test_uniform_to_normal"
+    call test_uniform_to_normal()
+
 contains
 
+    subroutine test_uniform_to_normal()
+
+        implicit none
+        integer, parameter :: ntest_values = 23
+
+        ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf
+        real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, &
+                                               -1.50, -1.00, -0.50, 0.000, 0.500,  1.00,  1.50,  2.00, &
+                                                2.50,  3.00,  3.50,  4.00,  4.50,  5.00,  5.50]
+
+        real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, &
+                                              0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, &
+                                              0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, &
+                                              0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, &
+                                              0.9999966, 0.9999997, 0.9999999810]
+        logical :: passing
+        integer :: i
+        real :: normal
+
+        passing = .True.
+        print*, "WARNING: Passing criteria may not be strict enough, errors can be up to 20%"
+
+        ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails
+        do i = 1, ntest_values
+            normal = get_random_from_cdf( cdf_values(i) )
+            if ((abs(normal_values(i) - normal ) > 1e-2) .and. (abs(normal_values(i) - normal )/abs(normal_values(i)) > 2e-1)) then
+                passing = .False.
+                print*, ""
+                print*, " Error = ",normal_values(i) - normal, (normal_values(i) - normal)/normal_values(i)
+                print*, " Expected = ", normal_values(i)
+                print*, " Got: ",normal, " for:",cdf_values(i)
+            endif
+        end do
+
+        if (passing) then
+            print*, " PASSED"
+        else
+            print*, " FAILED"
+        endif
+    end subroutine test_uniform_to_normal
+
+
     subroutine test_normal_to_cdf_conversion()
 
         implicit none
diff --git a/src/utilities/random.f90 b/src/utilities/random.f90
index 4c27c31..6b9ef4e 100644
--- a/src/utilities/random.f90
+++ b/src/utilities/random.f90
@@ -5,6 +5,13 @@ module random_mod
     implicit none
 
     double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8)
+
+    integer, parameter :: size_of_normal_cdf_lookup_table = 1000000
+    double precision, allocatable :: fast_cdf_lookup_table(:), cdf_lookup_table(:,:)
+    
+    interface normal_cdf
+        module procedure real_normal_cdf, double_normal_cdf
+    end interface
     
 contains
     
@@ -54,20 +61,153 @@ end subroutine box_muller_random
     !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429.
     !!
     !!------------------------------------------------
-    function normal_cdf(normal) result(cdf)
+    function real_normal_cdf(normal) result(cdf)
         implicit none
         real :: normal
         real :: cdf
         
-        cdf = 0.5 * (1+ sqrt((1- exp(0-(root_pi_over_8 * normal**2))) ))
+        cdf = 0.5 * (1 + sqrt((1 - exp(0 - (root_pi_over_8 * normal**2))) ))
         
         if (normal < 0) then
             cdf = 1 - cdf
         endif
         
-    end function normal_cdf
+    end function real_normal_cdf
 
-    
+    function double_normal_cdf(normal) result(cdf)
+        implicit none
+        double precision :: normal
+        double precision :: cdf
+        
+        cdf = 0.5 * (1 + sqrt((1.d0 - exp(0.d0 - (root_pi_over_8 * normal**2))) ))
+        
+        if (normal < 0) then
+            cdf = 1 - cdf
+        endif
+        
+    end function double_normal_cdf
+
+    subroutine create_cdf_lookup_table()
+        implicit none
+        
+        integer :: i
+        real :: normal_random_value, normal_step
+        
+        ! in case another thread beat this one to the lock after the if
+        if (.not.allocated(fast_cdf_lookup_table)) then
+            
+            allocate(cdf_lookup_table(size_of_normal_cdf_lookup_table, 2))
+            
+            normal_random_value = (-5.5)
+            normal_step = 11.0 / (size_of_normal_cdf_lookup_table - 1)
+            
+            do i = 1, size_of_normal_cdf_lookup_table
+                cdf_lookup_table(i,1) = normal_random_value
+                cdf_lookup_table(i,2) = normal_cdf(normal_random_value)
+
+                normal_random_value = normal_random_value + normal_step
+            enddo
+            
+            ! now invert this slow lookup table to create a fast lookup table
+            allocate(fast_cdf_lookup_table(size_of_normal_cdf_lookup_table))
+            
+            do i = 1,size_of_normal_cdf_lookup_table
+                fast_cdf_lookup_table(i) = look_up(dble(i) / size_of_normal_cdf_lookup_table, cdf_lookup_table)
+            enddo
+            
+            deallocate(cdf_lookup_table)
+        endif
+        
+    end subroutine create_cdf_lookup_table
+
+    ! perform a binary search through LUT to find value, then interpolate between the two closest entries. 
+    function look_up(value, LUT) result(output)
+        implicit none
+        double precision, intent(in)  :: value
+        double precision, intent(in)  :: LUT(:,:)
+        real :: output
+        
+        real :: fraction, denominator
+        integer :: i,n
+        integer :: step
+        
+        n = size(LUT, 1)
+        i = n/2
+        step = n/2
+        
+        ! perform a binary search
+        do while(step/2.0 >= 1)
+            step = (step+1) / 2
+            if (LUT(i,2) > value) then
+                i = max(1, i - step)
+            else
+                i = min(n, i + step)
+            endif
+            
+        enddo
+        
+        ! figure out which side of this index we fall on and interpolate
+        if (value > LUT(i,2)) then
+            if (i==n) then
+                output = LUT(i,1)
+            else
+                denominator = max(1e-20, (LUT(i+1,2) - LUT(i,2)))
+                fraction = (value - LUT(i,2)) / denominator
+                output = fraction * LUT(i+1,1) + (1-fraction) * LUT(i,1)
+            endif
+        else
+            if (i==1) then
+                output = LUT(i,1)
+            else
+                denominator = max(1e-20, (LUT(i,2) - LUT(i-1,2)))
+                fraction = (LUT(i,2) - value) / denominator
+                
+                output = fraction * LUT(i-1,1) + (1-fraction) * LUT(i,1)
+            endif
+        endif
+    end function look_up
+        
+
+    ! calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. 
+    function get_random_from_cdf(cdf_value) result(random_normal)
+        implicit none
+        real, intent(in)  :: cdf_value
+        real :: random_normal
+        
+        integer :: index
+        double precision :: real_index
+        double precision :: fraction
+        
+        if (.not.allocated(fast_cdf_lookup_table)) then
+            !$omp critical(cdf_lookup)
+            call create_cdf_lookup_table()
+            !$omp end critical(cdf_lookup)
+        endif
+
+        ! in the fast look up table we can just compute the optimal index
+        real_index = cdf_value * 1.0d0 * size_of_normal_cdf_lookup_table
+        index = min(size_of_normal_cdf_lookup_table, max(1, nint( real_index)) )
+        
+        ! then interpolate between this index and the next closest
+        if (index > real_index) then
+            if (index == 1) then
+                random_normal = fast_cdf_lookup_table(index)
+            else
+                fraction = (index - real_index)
+                random_normal = (1-fraction) * fast_cdf_lookup_table(index)         &
+                    + fraction * fast_cdf_lookup_table(max(1,index-1))
+            endif
+        else
+            if (index == size_of_normal_cdf_lookup_table) then
+                random_normal = fast_cdf_lookup_table(index)
+            else
+                fraction = (real_index - index)
+                random_normal = (1-fraction) * fast_cdf_lookup_table(index)         &
+                    + fraction * fast_cdf_lookup_table(min(size_of_normal_cdf_lookup_table,index+1))
+            endif
+        endif
+        
+    end function get_random_from_cdf
 
     subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold)
         implicit none
@@ -88,7 +228,7 @@ subroutine sample_distribution(output, mean_values, error_term, uniform_random_v
             if (present(exceedence_probability)) then
                 if (exceedence_probability(i) > (1-uniform_random_values(i))) then
 
-                    ! random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) )
+                    random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) )
 
                 endif
             endif

From 71a571485c9064e8250a47fa1e96fe2228efae3c Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Sat, 11 Feb 2017 02:08:39 -0700
Subject: [PATCH 14/23] Added comments to random

---
 src/utilities/random.f90 | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/utilities/random.f90 b/src/utilities/random.f90
index 6b9ef4e..0453942 100644
--- a/src/utilities/random.f90
+++ b/src/utilities/random.f90
@@ -74,6 +74,12 @@ function real_normal_cdf(normal) result(cdf)
         
     end function real_normal_cdf
 
+    !>------------------------------------------------
+    !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008)
+    !!
+    !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429.
+    !!
+    !!------------------------------------------------
     function double_normal_cdf(normal) result(cdf)
         implicit none
         double precision :: normal
@@ -87,6 +93,10 @@ function double_normal_cdf(normal) result(cdf)
         
     end function double_normal_cdf
 
+    !>------------------------------------------------
+    !! Create a look up table to convert from Uniform random number to Normal random
+    !!
+    !!------------------------------------------------
     subroutine create_cdf_lookup_table()
         implicit none
         
@@ -120,7 +130,10 @@ subroutine create_cdf_lookup_table()
         
     end subroutine create_cdf_lookup_table
 
-    ! perform a binary search through LUT to find value, then interpolate between the two closest entries. 
+    !>------------------------------------------------
+    !! Perform a binary search through LUT to find value, then interpolate between the two closest entries. 
+    !!
+    !!------------------------------------------------
     function look_up(value, LUT) result(output)
         implicit none
         double precision, intent(in)  :: value
@@ -168,7 +181,10 @@ function look_up(value, LUT) result(output)
     end function look_up
         
 
-    ! calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. 
+    !>------------------------------------------------
+    !! Calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. 
+    !!
+    !!------------------------------------------------
     function get_random_from_cdf(cdf_value) result(random_normal)
         implicit none
         real, intent(in)  :: cdf_value

From ea1eb5fed1d735aced18782fbfdcbee006ef1332 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Sun, 12 Feb 2017 23:28:51 -0700
Subject: [PATCH 15/23] added code to process the random error and POP terms

This needs to be done before the qq_normal remapping, or any post quantile mapping correction
---
 src/downscaling_stats/downscaling.f90 | 16 +++++
 src/downscaling_stats/sampling.f90    | 86 +++++++++++++++++++++++++++
 src/makefile                          |  4 ++
 src/utilities/random.f90              | 34 ++---------
 4 files changed, 111 insertions(+), 29 deletions(-)
 create mode 100644 src/downscaling_stats/sampling.f90

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index f91b556..e2eb3bd 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -10,6 +10,7 @@ module downscaling_mod
     use basic_stats_mod,    only : stddev
     use io_routines,        only : file_exists, io_read
     use random_mod,         only : box_muller_random
+    use sampling_mod,       only : sample_distribution
     implicit none
 
     real,    parameter :: LOG_FILL_VALUE = 1e-30
@@ -18,6 +19,8 @@ module downscaling_mod
 
     integer*8, dimension(10) :: master_timers
     real :: random_sample(N_RANDOM_SAMPLES)
+    
+    logical :: post_process_errors = .False.
 
 contains
     subroutine downscale(training_atm, training_obs, predictors, output, options)
@@ -270,6 +273,19 @@ subroutine downscale(training_atm, training_obs, predictors, output, options)
                                                     output%variables(v)%logistic    (           :,           i, j),    &
                                                     current_threshold, options, timers, i,j)
 
+                            if (post_process_errors) then
+                                if (current_threshold /= kFILL_VALUE) then
+                                    call sample_distribution(output%variables(v)%data(:,i,j),                            &
+                                                             output%variables(v)%data(:,i,j),                            &
+                                                             output%variables(v)%errors(:,i, j),                         &
+                                                             exceedence_probability=output%variables(v)%logistic(:,i,j), &
+                                                             threshold = current_threshold)
+                                else
+                                    call sample_distribution(output%variables(v)%data(:,i,j),                            &
+                                                             output%variables(v)%data(:,i,j),                            &
+                                                             output%variables(v)%errors(:,i, j))
+                                endif
+                            endif
                             ! if the input data were transformed with e.g. a cube root or log transform, then reverse that transformation for the output
                             call System_Clock(timeone)
                             call transform_data(options%obs%input_Xforms(v), output%variables(v)%data(:,i,j), 1, noutput, &
diff --git a/src/downscaling_stats/sampling.f90 b/src/downscaling_stats/sampling.f90
new file mode 100644
index 0000000..6a40819
--- /dev/null
+++ b/src/downscaling_stats/sampling.f90
@@ -0,0 +1,86 @@
+module sampling_mod
+
+    use random_mod, only : normal_cdf, box_muller_random, get_normal_from_cdf
+    implicit none
+contains
+
+    subroutine sample_distribution(output, mean_values, error_term, &
+                                   normal_random_values, uniform_random_values, &
+                                   exceedence_probability, threshold)
+        implicit none
+        real, intent(out) :: output(:)
+        real, intent(in)  :: mean_values(:), error_term(:)
+        real, intent(in), optional :: normal_random_values(:)
+        real, intent(in), optional :: exceedence_probability(:)
+        real, intent(in), optional :: uniform_random_values(:)
+        real, intent(in), optional :: threshold
+
+        integer :: i,n
+        real :: random_normal, poe, threshold_internal
+        real, allocatable :: uniform(:), normal(:)
+
+        threshold_internal = 0
+        if (present(threshold)) threshold_internal = threshold
+
+        n = size(mean_values)
+        ! the following section just ensures that optional variables have the necessary internal variables created.
+        if (present(exceedence_probability)) then
+            allocate(uniform(n))
+            if (present(uniform_random_values)) then
+                uniform = uniform_random_values
+            else
+                if (present(normal_random_values)) then
+                    ! if we were given normal random values, but not uniform, then create uniform from normal
+                    do i=1,n
+                        uniform(i) = normal_cdf( normal_random_values(i) )
+                    enddo
+                else
+                    ! While this will generate n uniform random numbers, there will not be any spatio-temporal
+                    ! auto-correlation to them.
+                    call random_number(uniform)
+                endif
+            endif
+        else
+            allocate(normal(n))
+            if (present(normal_random_values)) then
+                normal = normal_random_values
+            else
+                if (present(uniform_random_values)) then
+                    normal = uniform_random_values
+                else
+                    call random_number(normal)
+                endif
+                call box_muller_random(normal)
+            endif
+        endif
+
+        ! this is the heart of the distribution sampling code
+        do i=1,n
+            if (present(exceedence_probability)) then
+                if (uniform(i) > (1-exceedence_probability(i)) ) then
+
+                    ! if random number is greater than the non-exceedence probability, then we have exceeded it...
+                    ! rescale the uniform random number for the region of exceedence and convert to a normal random number
+                    random_normal = get_normal_from_cdf((uniform(i) - (1 - exceedence_probability(i))) / (exceedence_probability(i)) )
+                    
+                    ! ultimately this is the same equation as below for variables without an exceedence threshold
+                    output(i) = mean_values(i) + error_term(i) * random_normal
+                    
+                    ! also check, if the random term has pushed the value below the threshold anyway, then just set to the threshold
+                    if (output(i) < threshold_internal) then
+                        output(i) = threshold_internal
+                    endif
+                else
+                    ! if this value did not exceed the threshold, then just set it to the threshold. 
+                    output(i) = threshold_internal
+                endif
+            else
+                ! standard approach is to take the residuals multiply by a normal random variate and add to the mean prediction
+                output(i) = mean_values(i) + error_term(i) * normal(i)
+            endif
+        enddo
+
+    end subroutine sample_distribution
+
+
+end module sampling_mod
diff --git a/src/makefile b/src/makefile
index ed3f33e..247e3a3 100644
--- a/src/makefile
+++ b/src/makefile
@@ -283,6 +283,7 @@ OBJS =  \
 	$(BUILD)geo_reader.o      	\
     $(BUILD)basic_stats.o     	\
 	$(BUILD)random.o            \
+	$(BUILD)sampling.o			\
 	$(BUILD)sorting.o			\
 	$(BUILD)quantile_mapping.o  \
 	$(BUILD)analogs.o       	\
@@ -414,6 +415,9 @@ $(BUILD)sorting.o: utilities/sorting.f90 utilities/qsort_inline.inc
 $(BUILD)random.o: utilities/random.f90 $(BUILD)model_constants.o
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
+$(BUILD)sampling.o: downscaling_stats/sampling.f90 $(BUILD)random.o
+	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
+
 $(BUILD)basic_stats.o: utilities/basic_stats.f90
 	$(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@
 
diff --git a/src/utilities/random.f90 b/src/utilities/random.f90
index 0453942..628811e 100644
--- a/src/utilities/random.f90
+++ b/src/utilities/random.f90
@@ -4,6 +4,7 @@ module random_mod
 
     implicit none
 
+    private
     double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8)
 
     integer, parameter :: size_of_normal_cdf_lookup_table = 1000000
@@ -13,6 +14,8 @@ module random_mod
         module procedure real_normal_cdf, double_normal_cdf
     end interface
     
+    public :: normal_cdf, box_muller_random, get_normal_from_cdf
+    
 contains
     
     !>------------------------------------------------
@@ -185,7 +188,7 @@ end function look_up
     !! Calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. 
     !!
     !!------------------------------------------------
-    function get_random_from_cdf(cdf_value) result(random_normal)
+    function get_normal_from_cdf(cdf_value) result(random_normal)
         implicit none
         real, intent(in)  :: cdf_value
         real :: random_normal
@@ -223,34 +226,7 @@ function get_random_from_cdf(cdf_value) result(random_normal)
             endif
         endif
         
-    end function get_random_from_cdf
-
-    subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold)
-        implicit none
-        real, intent(out) :: output(:)
-        real, intent(in)  :: mean_values(:), error_term(:), uniform_random_values(:)
-        real, intent(in), optional :: exceedence_probability(:)
-        real, intent(in), optional :: threshold
-        
-        integer :: i,n
-        real :: random_normal, poe, threshold_internal
-        
-        threshold_internal = 0
-        if (present(threshold)) threshold_internal = threshold
-        
-        n = size(mean_values)
-        
-        do i=1,n
-            if (present(exceedence_probability)) then
-                if (exceedence_probability(i) > (1-uniform_random_values(i))) then
-
-                    random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) )
+    end function get_normal_from_cdf
 
-                endif
-            endif
-            
-        enddo
-        output = 1
-    end subroutine sample_distribution
 
 end module random_mod

From f728eed763e3c31676d3777dd686b9370dfb9944 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Sun, 12 Feb 2017 23:40:31 -0700
Subject: [PATCH 16/23] updates to random and qm tests

---
 src/tests/test_qm.f90     | 14 ++++++++++++--
 src/tests/test_random.f90 |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/tests/test_qm.f90 b/src/tests/test_qm.f90
index a871fbe..6852a3b 100644
--- a/src/tests/test_qm.f90
+++ b/src/tests/test_qm.f90
@@ -17,7 +17,7 @@ program test_qm
 
     implicit none
     logical :: verbose
-    integer*8, parameter :: n = 100000
+    integer*8, parameter :: n = 10000
     real, parameter :: acceptable_error = 1 ! 1e-3 <- use this with more QM segments and train on the complete dataset
 
     real, allocatable, dimension(:) :: input_data, matching_data
@@ -109,13 +109,19 @@ subroutine test_mapping(input_data, matching_data, name)
             print*, "To match:",sum(matching_data)/n
             print*, "   min/max",minval(matching_data),maxval(matching_data)
             print*, "---------------------------"
+            if (n < 20) then
+                print*, input_data
+                print*, "---------------------------"
+                print*, matching_data
+                print*, "---------------------------"
+            endif
         endif
 
         call system_clock(start_time)
         !-------------------------------
         ! Develop the Quantile mapping
         !-------------------------------
-        call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, 300)! min(size(input_data)/2, size(matching_data)/2))
+        call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, min(300,min(size(input_data)/2, size(matching_data)/2)))
 
         call system_clock(end_time, COUNT_RATE, COUNT_MAX)
         if (start_time>end_time) end_time=end_time+COUNT_MAX
@@ -185,6 +191,10 @@ subroutine show_results(qmdata, original, name, time)
             print*, "Q-Mapped Min/Max = ",minval(qmdata),   maxval(qmdata)
             print*, "Original Min/Max = ",minval(original), maxval(original)
             print*, "---------------------------"
+            if (n < 20) then
+                print*, qmdata
+                print*, "---------------------------"
+            endif
             print*, "   Elapsed time = ", time
         endif
         call pass_fail(error, "  Stddev:")
diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90
index c1dbf5b..43d9fe0 100644
--- a/src/tests/test_random.f90
+++ b/src/tests/test_random.f90
@@ -45,7 +45,7 @@ subroutine test_uniform_to_normal()
 
         ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails
         do i = 1, ntest_values
-            normal = get_random_from_cdf( cdf_values(i) )
+            normal = get_normal_from_cdf( cdf_values(i) )
             if ((abs(normal_values(i) - normal ) > 1e-2) .and. (abs(normal_values(i) - normal )/abs(normal_values(i)) > 2e-1)) then
                 passing = .False.
                 print*, ""

From 9cdcda9a9ca5a8e4b0612808ade6eaeac93edbcb Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Tue, 14 Feb 2017 16:51:07 -0700
Subject: [PATCH 17/23] bug fixes (wouldn't compile after merge)

---
 src/config/configuration.f90  | 4 ++--
 src/utilities/basic_stats.f90 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/config/configuration.f90 b/src/config/configuration.f90
index 27477c5..e8616f4 100644
--- a/src/config/configuration.f90
+++ b/src/config/configuration.f90
@@ -158,13 +158,13 @@ subroutine read_base_options(options)
         ! this is the time period to use when calculating e.g. quantile mapping transformations
         call options%transform_start%init("gregorian")
         if (start_transform==end_transform) then
-            Print* "WARNING: If you are using any transforms start and end date should not be the same"
+            write(*,*) "WARNING: If you are using any transforms start and end date should not be the same"
         endif
         call options%transform_start%set(start_transform)
         call options%transform_stop%init("gregorian")
         call options%transform_stop%set(end_transform)
         ! this is the time period to use when calculating e.g. quantile mapping transformations for post processing
-        if (maxval(post_correction_Xform) /= kNO_TRANSFORM) then
+        if (maxval(post_correction_transform) /= kNO_TRANSFORM) then
             call options%post_start%init("gregorian")
             if (start_post=="") then
                 stop "ERROR must set a post-processing start date"
diff --git a/src/utilities/basic_stats.f90 b/src/utilities/basic_stats.f90
index f5bd5a4..79e03d7 100644
--- a/src/utilities/basic_stats.f90
+++ b/src/utilities/basic_stats.f90
@@ -100,7 +100,7 @@ subroutine time_stddev(input, stddev, mean_in)
             do x=1,nx
                 difference = input(:,x,y) - mean(x,y)
                 ! in case there are bad input data, prevents a floating point overflow
-                where(difference > 1e10) = 1e10
+                where(difference > 1e10) difference = 1e10
                 stddev(x,y) = sqrt( sum( difference**2 ) / (nt-1) )
             end do
         end do

From 08a33fb6ad4e722e15e80e7f352454be5c3ef890 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Tue, 14 Feb 2017 21:48:47 -0700
Subject: [PATCH 18/23] added an error check to setup timing

Now checks that obs and atm training data have the same number of
time steps available in the training period.
---
 src/downscaling_stats/downscaling.f90 | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index e2eb3bd..92c8925 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -19,7 +19,7 @@ module downscaling_mod
 
     integer*8, dimension(10) :: master_timers
     real :: random_sample(N_RANDOM_SAMPLES)
-    
+
     logical :: post_process_errors = .False.
 
 contains
@@ -1121,6 +1121,7 @@ subroutine setup_timing(training_atm, training_obs, predictors, options)
         type(atm),    intent(inout) :: training_atm, predictors
         type(obs),    intent(inout) :: training_obs
         type(config), intent(in) :: options
+        integer :: n_atm_train, n_obs_train
 
         write(*,*) "-------------------"
         write(*,*) "Training ATM"
@@ -1132,6 +1133,13 @@ subroutine setup_timing(training_atm, training_obs, predictors, options)
         write(*,*) "Predictors"
         call setup_time_indices(predictors, options)
 
+        n_obs_train = training_obs%training_end - training_obs%training_start
+        n_atm_train = training_atm%training_end - training_atm%training_start
+
+        if (n_obs_train /= n_atm_train) then
+            stop "ERROR Inconsistent time periods in training atm and obs data"
+        endif
+
     end subroutine setup_timing
 
     subroutine allocate_data(output, n_obs_variables, noutput, nx, ny, n_atm_variables, tr_size, options)

From 7d98e0df912cf95659f267ba36bcf5bcec4df9c6 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Tue, 14 Feb 2017 21:49:59 -0700
Subject: [PATCH 19/23] minor bux fix for timing check

---
 src/downscaling_stats/downscaling.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index 92c8925..3265d5b 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -1133,8 +1133,8 @@ subroutine setup_timing(training_atm, training_obs, predictors, options)
         write(*,*) "Predictors"
         call setup_time_indices(predictors, options)
 
-        n_obs_train = training_obs%training_end - training_obs%training_start
-        n_atm_train = training_atm%training_end - training_atm%training_start
+        n_obs_train = training_obs%training_stop - training_obs%training_start
+        n_atm_train = training_atm%training_stop - training_atm%training_start
 
         if (n_obs_train /= n_atm_train) then
             stop "ERROR Inconsistent time periods in training atm and obs data"

From c3317cbf9cac6fa6366f2c3105005ae5fe585683 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Wed, 15 Feb 2017 10:08:57 -0700
Subject: [PATCH 20/23] modified lapack regression to test for exceedingly
 large input values

---
 src/downscaling_stats/regression.f90 | 56 +++++++++++++++-------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/downscaling_stats/regression.f90 b/src/downscaling_stats/regression.f90
index 65c232b..ae37fc9 100644
--- a/src/downscaling_stats/regression.f90
+++ b/src/downscaling_stats/regression.f90
@@ -171,33 +171,39 @@ subroutine lapack_least_squares(X, Y, B, working_space, info)
 
         nrhs = 1
 
-        if (present(working_space)) then
-            ! First find the optimal work size (LWORK)
-            LWORK = -1
-            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
-            LWORK = MIN( size(working_space), INT( working_space( 1 ) ) )
-
-            ! Now solve the equations X*B = Y
-            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
+        if (max(maxval(X), maxval(Y)) > 1e8) then
+            print*, "ERROR regression inputs out of range"
+            print*, maxval(X), maxval(Y)
+            innerINFO = -1
         else
-            ! First find the optimal work size (LWORK)
-            LWORK = -1
-            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
-            LWORK = MIN( 1000, INT( WORK( 1 ) ) )
+            if (present(working_space)) then
+                ! First find the optimal work size (LWORK)
+                LWORK = -1
+                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
+                LWORK = MIN( size(working_space), INT( working_space( 1 ) ) )
+
+                ! Now solve the equations X*B = Y
+                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
+            else
+                ! First find the optimal work size (LWORK)
+                LWORK = -1
+                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
+                LWORK = MIN( 1000, INT( WORK( 1 ) ) )
 
-            ! Now solve the equations X*B = Y
-            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
-        endif
-        if (innerINFO/=0) then
-            ! !$omp critical (print_lock)
-            ! if (innerINFO<0) then
-            !     write(*,*) "ERROR in SGELS with argument:", 0-innerINFO
-            ! else
-            !     write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO
-            ! endif
-            ! !$omp end critical (print_lock)
-            Y(1) = sum(Y)/size(Y)
-            Y(2:)= 0
+                ! Now solve the equations X*B = Y
+                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
+            endif
+            if (innerINFO/=0) then
+                ! !$omp critical (print_lock)
+                ! if (innerINFO<0) then
+                !     write(*,*) "ERROR in SGELS with argument:", 0-innerINFO
+                ! else
+                !     write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO
+                ! endif
+                ! !$omp end critical (print_lock)
+                Y(1) = sum(Y)/size(Y)
+                Y(2:)= 0
+            endif
         endif
 
         if (present(info)) info = innerINFO

From 3fb615c830958d0edb406982f29fe2acaf364f25 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Wed, 15 Feb 2017 16:33:51 -0700
Subject: [PATCH 21/23] add NaN checks for logistic regression, requires
 ieee_arithmetic

---
 src/downscaling_stats/downscaling.f90 | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90
index 3265d5b..116bcd1 100644
--- a/src/downscaling_stats/downscaling.f90
+++ b/src/downscaling_stats/downscaling.f90
@@ -1,5 +1,6 @@
 module downscaling_mod
 
+    USE ieee_arithmetic
     use data_structures
     use string,             only : str
     use regression_mod,     only : compute_regression, compute_logistic_regression
@@ -873,12 +874,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error,
                     call System_Clock(timetwo)
                     timers(9) = timers(9) + (timetwo-timeone)
 
-                    ! compute the logistic as the probability of threshold exceedance in the analog population
                     call System_Clock(timeone)
-                    logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold)
+                    if (options%analog_weights) then
+                        ! compute the logistic as the weighted probability of threshold exceedance in the analog population
+                        logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold, weights)
+                    else
+                        ! compute the logistic as the probability of threshold exceedance in the analog population
+                        logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold)
+                    endif
                 else
 
                     if (options%analog_weights) then
+                        ! compute the logistic as the weighted probability of threshold exceedance in the analog population
                         logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights)
                     else
                         ! compute the logistic as the probability of threshold exceedance in the analog population
@@ -888,8 +895,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error,
 
             else
                 logistic = compute_logistic_regression(x, regression_data, obs_analogs, coefficients, logistic_threshold)
-                where( coefficients >  1e20 ) coefficients =  1e20
-                where( coefficients < -1e20 ) coefficients = -1e20
+
+                ! Check for severe errors in the logistic regression.  This can happen with some input data.
+                ! If it fails, fall back to the analog exceedence calculation
+                if (ieee_is_nan(logistic)) then
+                    if (options%analog_weights) then
+                        logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights)
+                    else
+                        ! compute the logistic as the probability of threshold exceedance in the analog population
+                        logistic = compute_analog_exceedance(obs, analogs, logistic_threshold)
+                    endif
+
+                endif
                 if (options%debug) then
                     do j = 1,nvars
                         output_coeff(j+nvars) = coefficients(j)

From 6785edc9d832f836ca9059582d2da23cc8c0c436 Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Wed, 15 Feb 2017 16:34:23 -0700
Subject: [PATCH 22/23] check size of input analog array in find_analogs

---
 src/downscaling_stats/analogs.f90 | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/downscaling_stats/analogs.f90 b/src/downscaling_stats/analogs.f90
index b4d274d..a73ada4 100644
--- a/src/downscaling_stats/analogs.f90
+++ b/src/downscaling_stats/analogs.f90
@@ -43,6 +43,10 @@ subroutine find_analogs(analogs, match, input, n, threshold, weights, skip_analo
         ! fast selection (O(n_inputs log(nanalogs)) worst case, typically closer to O(n_inputs))
         if (n>0) then
             if (.not.allocated(analogs)) allocate(analogs(n))
+            if (size(analogs) < n) then
+                deallocate(analogs)
+                allocate(analogs(n))
+            endif
             call top_n_analogs(distances, n, analogs)
         elseif (threshold>0) then
             distances = distances / nvars ! normalize by the number of variables so the supplied threshold doesn't have to changes

From ce88de065abfa538a59508af188e338012ae9fab Mon Sep 17 00:00:00 2001
From: Ethan Gutmann <gutmann@ucar.edu>
Date: Wed, 15 Feb 2017 16:35:15 -0700
Subject: [PATCH 23/23] removed a test for crazy inputs in regression, added
 omp print_lock

---
 src/downscaling_stats/regression.f90 | 62 ++++++++++++----------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/downscaling_stats/regression.f90 b/src/downscaling_stats/regression.f90
index ae37fc9..1a38c8d 100644
--- a/src/downscaling_stats/regression.f90
+++ b/src/downscaling_stats/regression.f90
@@ -33,10 +33,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err
 
         allocate(varlist(nvars))
         varlist = -1
-        varlist(1) = 1   ! this assumes there is a constant passed in for the first variable... 
-        useable_vars = 1 ! the constant 
-        
-        ! find all vars that have some variability through time. 
+        varlist(1) = 1   ! this assumes there is a constant passed in for the first variable...
+        useable_vars = 1 ! the constant
+
+        ! find all vars that have some variability through time.
         do i=2,nvars
             nonzero_count = 0
             do j=1,ntimes
@@ -45,7 +45,7 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err
                 endif
             enddo
             ! need at least two data points that are different from the first data point
-            ! to ensure that the regression on that variable might be useful. 
+            ! to ensure that the regression on that variable might be useful.
             if ( nonzero_count > 2 ) then
                 useable_vars = useable_vars + 1
                 varlist(i) = i
@@ -116,8 +116,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err
             if (present(weights)) then
                 if (allocated(weights)) then
                     if (size(weights)/=size(training_y_lp)) then
+                        !$omp critical (print_lock)
                         write(*,*) "ERROR size of weights /= data"
                         write(*,*), shape(weights), shape(training_y_lp)
+                        !$omp end critical (print_lock)
                     endif
                     if (present(y_test)) then
                         error = sqrt( sum(((training_y_lp - y_test)**2)*weights) / sum(weights) )
@@ -171,39 +173,27 @@ subroutine lapack_least_squares(X, Y, B, working_space, info)
 
         nrhs = 1
 
-        if (max(maxval(X), maxval(Y)) > 1e8) then
-            print*, "ERROR regression inputs out of range"
-            print*, maxval(X), maxval(Y)
-            innerINFO = -1
+        ! passing working_space in and out *could* be more efficient.  Only used in tests right now
+        if (present(working_space)) then
+            ! First find the optimal work size (LWORK)
+            LWORK = -1
+            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
+            LWORK = MIN( size(working_space), INT( working_space( 1 ) ) )
+
+            ! Now solve the equations X*B = Y
+            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
         else
-            if (present(working_space)) then
-                ! First find the optimal work size (LWORK)
-                LWORK = -1
-                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
-                LWORK = MIN( size(working_space), INT( working_space( 1 ) ) )
-
-                ! Now solve the equations X*B = Y
-                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO )
-            else
-                ! First find the optimal work size (LWORK)
-                LWORK = -1
-                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
-                LWORK = MIN( 1000, INT( WORK( 1 ) ) )
+            ! First find the optimal work size (LWORK)
+            LWORK = -1
+            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
+            LWORK = MIN( 1000, INT( WORK( 1 ) ) )
 
-                ! Now solve the equations X*B = Y
-                CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
-            endif
-            if (innerINFO/=0) then
-                ! !$omp critical (print_lock)
-                ! if (innerINFO<0) then
-                !     write(*,*) "ERROR in SGELS with argument:", 0-innerINFO
-                ! else
-                !     write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO
-                ! endif
-                ! !$omp end critical (print_lock)
-                Y(1) = sum(Y)/size(Y)
-                Y(2:)= 0
-            endif
+            ! Now solve the equations X*B = Y
+            CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
+        endif
+        if (innerINFO/=0) then
+            Y(1) = sum(Y)/size(Y)
+            Y(2:)= 0
         endif
 
         if (present(info)) info = innerINFO