From a1996b445d31657595f6eb82916f399285209194 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Thu, 9 Feb 2017 10:59:02 -0700 Subject: [PATCH 01/23] modifying travis to use gfortran 5 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1b74a82..b80f5ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ addons: source: - ubuntu-toolchain-r-test packages: - - gfortran + - gfortran-5 - libnetcdf-dev - liblapack-dev script: From 936ddfe6ba5cbccbb235e073a68b8dafc27009f9 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Thu, 9 Feb 2017 11:10:48 -0700 Subject: [PATCH 02/23] modifying travis to use gfortran 5 take 2 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b80f5ae..0cef0ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ sudo: false language: fortran compiler: - - gfortran + - gfortran-5 os: - linux addons: From e96c2d4b6121c3731db9f2dae38ab2a8ecf78c7b Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Thu, 9 Feb 2017 11:13:46 -0700 Subject: [PATCH 03/23] modifying travis to use gfortran 5 take 3 --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0cef0ed..1da524c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,16 @@ sudo: false language: fortran compiler: - - gfortran-5 + - gfortran-6 os: - linux +env: TESTID='GARD_linux' addons: apt: source: - ubuntu-toolchain-r-test packages: - - gfortran-5 + - gfortran-6 - libnetcdf-dev - liblapack-dev script: From 3a6cf3fe3982b47437d3ef2bc6e71051499c79ab Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Thu, 9 Feb 2017 13:42:05 -0700 Subject: [PATCH 04/23] Updating travis take 4 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1da524c..fd946a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ os: env: TESTID='GARD_linux' addons: apt: - source: + sources: - ubuntu-toolchain-r-test packages: - gfortran-6 From ad63ff5cfef468cda5478ac8c230bf9edc0261ae Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 22:44:19 -0700 Subject: [PATCH 05/23] Moved random routines into random module, removed dependency on gfortran>5 --- src/downscaling_stats/downscaling.f90 | 45 ++---------- src/makefile | 6 +- src/utilities/random.f90 | 100 ++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 42 deletions(-) create mode 100644 src/utilities/random.f90 diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index 5244305..07bf46c 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -9,6 +9,7 @@ module downscaling_mod use time_util, only : setup_time_indices use basic_stats_mod, only : stddev use io_routines, only : file_exists, io_read + use random_mod, only : box_muller_random implicit none integer*8, dimension(10) :: master_timers @@ -560,46 +561,6 @@ subroutine transform_data(transform_type, pred_data, & end select end subroutine transform_data - !>------------------------------------------------ - !! Use the Box-Muller Transform to convert uniform to normal random deviates - !! - !! Note random_sample should be an allocated 1D real array - !! On return, random_sample will be filled with random normal (0,1) data - !! - !! Caveat: this transform is unable to generate extremely high values (>6.66) - !! Having switched to double precision it may do better now. - !! - !------------------------------------------------- - subroutine box_muller_random(random_sample) - implicit none - real, intent(inout) :: random_sample(:) - integer :: n,i - - double precision :: u1, u2, s - double precision, allocatable :: double_random(:) - - n = size(random_sample) - allocate(double_random(n)) - call random_number(double_random) - - do i=1,n,2 - u1 = double_random(i) - if (i 0) then do i=1, n start = max(1, i - options%time_smooth) diff --git a/src/makefile b/src/makefile index b67a281..c1d231d 100644 --- a/src/makefile +++ b/src/makefile @@ -282,6 +282,7 @@ OBJS = \ $(BUILD)obs_io.o \ $(BUILD)geo_reader.o \ $(BUILD)basic_stats.o \ + $(BUILD)random.o \ $(BUILD)sorting.o \ $(BUILD)quantile_mapping.o \ $(BUILD)analogs.o \ @@ -399,6 +400,9 @@ $(BUILD)quantile_mapping.o: utilities/quantile_map.f90 $(BUILD)data_structures.o $(BUILD)sorting.o: utilities/sorting.f90 utilities/qsort_inline.inc $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ +$(BUILD)random.o: utilities/random.f90 $(BUILD)model_constants.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + $(BUILD)basic_stats.o: utilities/basic_stats.f90 $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ @@ -454,7 +458,7 @@ $(BUILD)atmosphere_io.o: io/atmosphere_io.f90 $(BUILD)data_structures.o \ $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ $(BUILD)downscaling.o: downscaling_stats/downscaling.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o \ - $(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o + $(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o $(BUILD)random.o $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ $(BUILD)analogs.o: downscaling_stats/analogs.f90 $(BUILD)string.o diff --git a/src/utilities/random.f90 b/src/utilities/random.f90 new file mode 100644 index 0000000..4c27c31 --- /dev/null +++ b/src/utilities/random.f90 @@ -0,0 +1,100 @@ +module random_mod + + use model_constants + + implicit none + + double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8) + +contains + + !>------------------------------------------------ + !! Use the Box-Muller Transform to convert uniform to normal random deviates + !! + !! Note random_sample should be an allocated 1D real array + !! On return, random_sample will be filled with random normal (0,1) data + !! + !! Caveat: this transform is unable to generate extremely high values (>6.66) + !! Having switched to double precision it may do better now. + !! + !------------------------------------------------- + subroutine box_muller_random(random_sample) + implicit none + real, intent(inout) :: random_sample(:) + integer :: n,i + + double precision :: u1, u2, s + double precision, allocatable :: double_random(:) + + n = size(random_sample) + allocate(double_random(n)) + call random_number(double_random) + + do i=1,n,2 + u1 = double_random(i) + if (i------------------------------------------------ + !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008) + !! + !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429. + !! + !!------------------------------------------------ + function normal_cdf(normal) result(cdf) + implicit none + real :: normal + real :: cdf + + cdf = 0.5 * (1+ sqrt((1- exp(0-(root_pi_over_8 * normal**2))) )) + + if (normal < 0) then + cdf = 1 - cdf + endif + + end function normal_cdf + + + + subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold) + implicit none + real, intent(out) :: output(:) + real, intent(in) :: mean_values(:), error_term(:), uniform_random_values(:) + real, intent(in), optional :: exceedence_probability(:) + real, intent(in), optional :: threshold + + integer :: i,n + real :: random_normal, poe, threshold_internal + + threshold_internal = 0 + if (present(threshold)) threshold_internal = threshold + + n = size(mean_values) + + do i=1,n + if (present(exceedence_probability)) then + if (exceedence_probability(i) > (1-uniform_random_values(i))) then + + ! random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) ) + + endif + endif + + enddo + output = 1 + end subroutine sample_distribution + +end module random_mod From 4d61c736b62b93c2d2d0ce585eb15ebed598fb21 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 22:47:14 -0700 Subject: [PATCH 06/23] moving travis build back to gfortran default version --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index fd946a9..2c5b194 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,15 @@ sudo: false language: fortran compiler: - - gfortran-6 + - gfortran os: - linux -env: TESTID='GARD_linux' addons: apt: sources: - ubuntu-toolchain-r-test packages: - - gfortran-6 + - gfortran - libnetcdf-dev - liblapack-dev script: From 47da3291292bf15f619451ca614c2592002cb620 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 22:52:54 -0700 Subject: [PATCH 07/23] changed parameter from int to real for gfortran (standards?) compatibility --- src/downscaling_stats/downscaling.f90 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index 07bf46c..f91b556 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -12,10 +12,11 @@ module downscaling_mod use random_mod, only : box_muller_random implicit none + real, parameter :: LOG_FILL_VALUE = 1e-30 + real, parameter :: MAX_ALLOWED_SIGMA = 20 + integer, parameter :: N_RANDOM_SAMPLES = 10000 + integer*8, dimension(10) :: master_timers - real, parameter :: LOG_FILL_VALUE = 1e-30 - real, parameter :: MAX_ALLOWED_SIGMA = 20 - real, parameter :: N_RANDOM_SAMPLES = 10000 real :: random_sample(N_RANDOM_SAMPLES) contains From a8b430398b761e9a05040a63aeb47bed8193aef5 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 23:35:11 -0700 Subject: [PATCH 08/23] Added test for random number module --- .gitignore | 1 + src/makefile | 13 +++++++++- src/tests/test_random.f90 | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/tests/test_random.f90 diff --git a/.gitignore b/.gitignore index 73894b5..1996925 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,5 @@ test_regression test_calendar test_qm test_config +test_random gard diff --git a/src/makefile b/src/makefile index c1d231d..ed3f33e 100644 --- a/src/makefile +++ b/src/makefile @@ -320,6 +320,10 @@ REGRESSION_TEST_OBJS = \ $(BUILD)regression.o \ $(BUILD)test_regression.o +RANDOM_TEST_OBJS = \ + $(BUILD)test_random.o \ + $(BUILD)random.o \ + $(BUILD)model_constants.o # These are the primary user facing rules # @@ -330,7 +334,7 @@ model: gard all: gard test -test: test_calendar test_sort test_qm test_config test_regression +test: test_calendar test_sort test_qm test_config test_regression test_random install: gard cp gard $(INSTALL_DIR)/ @@ -359,10 +363,14 @@ test_qm : $(QM_TEST_OBJS) test_sort : $(SORT_TEST_OBJS) $(FC) -o $@ $^ $(LDFLAGS) +test_random : $(RANDOM_TEST_OBJS) + $(FC) -o $@ $^ $(LDFLAGS) + test_config : $(CONFIG_TEST_OBJS) $(FC) -o $@ $^ $(LDFLAGS) + # specify specific builds and dependencies. # unit tests @@ -381,6 +389,9 @@ $(BUILD)test_qm.o: tests/test_qm.f90 $(BUILD)data_structures.o $(BUILD)quantile_ $(BUILD)test_config.o: tests/test_config.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o $(BUILD)configuration.o $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ +$(BUILD)test_random.o: tests/test_random.f90 $(BUILD)random.o $(BUILD)model_constants.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + # Date and time relevant objects $(BUILD)time.o: timing/time.f90 $(BUILD)model_constants.o diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 new file mode 100644 index 0000000..7970d8f --- /dev/null +++ b/src/tests/test_random.f90 @@ -0,0 +1,53 @@ +program test_random + + use random_mod + + implicit none + + print*, "test_normal_to_cdf_conversion" + call test_normal_to_cdf_conversion() + + +contains + + + subroutine test_normal_to_cdf_conversion() + + implicit none + integer, parameter :: ntest_values = 23 + + ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf + real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, & + -1.50, -1.00, -0.50, 0.000, 0.500, 1.00, 1.50, 2.00, & + 2.50, 3.00, 3.50, 4.00, 4.50, 5.00, 5.50] + + real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, & + 0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, & + 0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, & + 0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, & + 0.9999966, 0.9999997, 0.9999999810] + logical :: passing + integer :: i + + passing = .True. + + ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails + do i = 1, ntest_values + if (abs(cdf_values(i) - normal_cdf( normal_values(i) ) ) > 2e-3) then + passing = .False. + print*, "" + print*, " Error = ",cdf_values(i) - normal_cdf( normal_values(i)) + print*, " Expected = ", cdf_values(i) + print*, " Got: ",normal_cdf( normal_values(i)), " for:",normal_values(i) + endif + end do + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_normal_to_cdf_conversion + + +end program test_random From 4b0db27a2672c5cebcbeb73ef2300073ea5fa3d3 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 23:46:28 -0700 Subject: [PATCH 09/23] Added test for box-muller transform --- src/tests/test_random.f90 | 50 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index 7970d8f..2abfa7c 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -7,10 +7,11 @@ program test_random print*, "test_normal_to_cdf_conversion" call test_normal_to_cdf_conversion() + print*, "test_box_muller_random" + call test_box_muller_random() contains - subroutine test_normal_to_cdf_conversion() implicit none @@ -49,5 +50,52 @@ subroutine test_normal_to_cdf_conversion() endif end subroutine test_normal_to_cdf_conversion + subroutine test_box_muller_random() + implicit none + + integer, parameter :: n_random_values = 10000 + + real :: random_values(n_random_values) + real :: sigma, mean + integer :: i + logical :: passing + + passing = .True. + + call box_muller_random(random_values) + + mean = sum(random_values) / size(random_values) + if (abs(mean) > 1e-2) then + print*, " Error failed computing mean: ",mean + passing = .False. + endif + + sigma = 0 + do i = 1, n_random_values + sigma = sigma + (random_values(i) - mean)**2 + enddo + sigma = sqrt(sigma / (n_random_values - 1)) + if (abs(sigma - 1) > 1e-2) then + print*, " Error failed computing standard deviation: ", sigma + passing = .False. + endif + + + if (minval(random_values) > (-3)) then + print*, " Error failed computing minimum value: ", minval(random_values) + passing = .False. + endif + + if (maxval(random_values) < (3)) then + print*, " Error failed computing maximum value: ", maxval(random_values) + passing = .False. + endif + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_box_muller_random end program test_random From ac6524c5cafd8e318a7405304e84a870d7309289 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 23:50:29 -0700 Subject: [PATCH 10/23] Added test for box-muller transform --- src/tests/test_random.f90 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index 2abfa7c..ca5d8c3 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -7,8 +7,10 @@ program test_random print*, "test_normal_to_cdf_conversion" call test_normal_to_cdf_conversion() - print*, "test_box_muller_random" - call test_box_muller_random() + print*, "test_box_muller_random(10000)" + call test_box_muller_random(10000) + print*, "test_box_muller_random(999)" + call test_box_muller_random(999) contains @@ -50,10 +52,10 @@ subroutine test_normal_to_cdf_conversion() endif end subroutine test_normal_to_cdf_conversion - subroutine test_box_muller_random() + subroutine test_box_muller_random(n_random_values) implicit none - integer, parameter :: n_random_values = 10000 + integer, intent(in) :: n_random_values real :: random_values(n_random_values) real :: sigma, mean From fdfa28820fbca100632dbc72865af6889405d5f4 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 23:51:07 -0700 Subject: [PATCH 11/23] Added test for box-muller transform --- src/tests/test_random.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index ca5d8c3..9e7a500 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -10,7 +10,7 @@ program test_random print*, "test_box_muller_random(10000)" call test_box_muller_random(10000) print*, "test_box_muller_random(999)" - call test_box_muller_random(999) + call test_box_muller_random(9999) contains From df46a9fc2c6f338df930c34e6019584b0da3d067 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Fri, 10 Feb 2017 23:52:28 -0700 Subject: [PATCH 12/23] Added (passing) test for box-muller transform with odd number of values --- src/tests/test_random.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index 9e7a500..63baa44 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -77,7 +77,7 @@ subroutine test_box_muller_random(n_random_values) sigma = sigma + (random_values(i) - mean)**2 enddo sigma = sqrt(sigma / (n_random_values - 1)) - if (abs(sigma - 1) > 1e-2) then + if (abs(sigma - 1) > 1e-1) then print*, " Error failed computing standard deviation: ", sigma passing = .False. endif From 733373ccc3408875046289839818c5bd7bd5fb51 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Sat, 11 Feb 2017 02:06:47 -0700 Subject: [PATCH 13/23] Added capability to compute a corresponding random normal from a uniform random, and tests --- src/tests/test_random.f90 | 49 +++++++++++++ src/utilities/random.f90 | 150 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 194 insertions(+), 5 deletions(-) diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index 63baa44..c1dbf5b 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -4,16 +4,65 @@ program test_random implicit none + print*, "-------------------------------------" print*, "test_normal_to_cdf_conversion" call test_normal_to_cdf_conversion() + print*, "-------------------------------------" print*, "test_box_muller_random(10000)" call test_box_muller_random(10000) + print*, "-------------------------------------" print*, "test_box_muller_random(999)" call test_box_muller_random(9999) + print*, "-------------------------------------" + print*, "test_uniform_to_normal" + call test_uniform_to_normal() + contains + subroutine test_uniform_to_normal() + + implicit none + integer, parameter :: ntest_values = 23 + + ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf + real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, & + -1.50, -1.00, -0.50, 0.000, 0.500, 1.00, 1.50, 2.00, & + 2.50, 3.00, 3.50, 4.00, 4.50, 5.00, 5.50] + + real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, & + 0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, & + 0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, & + 0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, & + 0.9999966, 0.9999997, 0.9999999810] + logical :: passing + integer :: i + real :: normal + + passing = .True. + print*, "WARNING: Passing criteria may not be strict enough, errors can be up to 20%" + + ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails + do i = 1, ntest_values + normal = get_random_from_cdf( cdf_values(i) ) + if ((abs(normal_values(i) - normal ) > 1e-2) .and. (abs(normal_values(i) - normal )/abs(normal_values(i)) > 2e-1)) then + passing = .False. + print*, "" + print*, " Error = ",normal_values(i) - normal, (normal_values(i) - normal)/normal_values(i) + print*, " Expected = ", normal_values(i) + print*, " Got: ",normal, " for:",cdf_values(i) + endif + end do + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_uniform_to_normal + + subroutine test_normal_to_cdf_conversion() implicit none diff --git a/src/utilities/random.f90 b/src/utilities/random.f90 index 4c27c31..6b9ef4e 100644 --- a/src/utilities/random.f90 +++ b/src/utilities/random.f90 @@ -5,6 +5,13 @@ module random_mod implicit none double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8) + + integer, parameter :: size_of_normal_cdf_lookup_table = 1000000 + double precision, allocatable :: fast_cdf_lookup_table(:), cdf_lookup_table(:,:) + + interface normal_cdf + module procedure real_normal_cdf, double_normal_cdf + end interface contains @@ -54,20 +61,153 @@ end subroutine box_muller_random !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429. !! !!------------------------------------------------ - function normal_cdf(normal) result(cdf) + function real_normal_cdf(normal) result(cdf) implicit none real :: normal real :: cdf - cdf = 0.5 * (1+ sqrt((1- exp(0-(root_pi_over_8 * normal**2))) )) + cdf = 0.5 * (1 + sqrt((1 - exp(0 - (root_pi_over_8 * normal**2))) )) if (normal < 0) then cdf = 1 - cdf endif - end function normal_cdf + end function real_normal_cdf - + function double_normal_cdf(normal) result(cdf) + implicit none + double precision :: normal + double precision :: cdf + + cdf = 0.5 * (1 + sqrt((1.d0 - exp(0.d0 - (root_pi_over_8 * normal**2))) )) + + if (normal < 0) then + cdf = 1 - cdf + endif + + end function double_normal_cdf + + subroutine create_cdf_lookup_table() + implicit none + + integer :: i + real :: normal_random_value, normal_step + + ! in case another thread beat this one to the lock after the if + if (.not.allocated(fast_cdf_lookup_table)) then + + allocate(cdf_lookup_table(size_of_normal_cdf_lookup_table, 2)) + + normal_random_value = (-5.5) + normal_step = 11.0 / (size_of_normal_cdf_lookup_table - 1) + + do i = 1, size_of_normal_cdf_lookup_table + cdf_lookup_table(i,1) = normal_random_value + cdf_lookup_table(i,2) = normal_cdf(normal_random_value) + + normal_random_value = normal_random_value + normal_step + enddo + + ! now invert this slow lookup table to create a fast lookup table + allocate(fast_cdf_lookup_table(size_of_normal_cdf_lookup_table)) + + do i = 1,size_of_normal_cdf_lookup_table + fast_cdf_lookup_table(i) = look_up(dble(i) / size_of_normal_cdf_lookup_table, cdf_lookup_table) + enddo + + deallocate(cdf_lookup_table) + endif + + end subroutine create_cdf_lookup_table + + ! perform a binary search through LUT to find value, then interpolate between the two closest entries. + function look_up(value, LUT) result(output) + implicit none + double precision, intent(in) :: value + double precision, intent(in) :: LUT(:,:) + real :: output + + real :: fraction, denominator + integer :: i,n + integer :: step + + n = size(LUT, 1) + i = n/2 + step = n/2 + + ! perform a binary search + do while(step/2.0 >= 1) + step = (step+1) / 2 + if (LUT(i,2) > value) then + i = max(1, i - step) + else + i = min(n, i + step) + endif + + enddo + + ! figure out which side of this index we fall on and interpolate + if (value > LUT(i,2)) then + if (i==n) then + output = LUT(i,1) + else + denominator = max(1e-20, (LUT(i+1,2) - LUT(i,2))) + fraction = (value - LUT(i,2)) / denominator + output = fraction * LUT(i+1,1) + (1-fraction) * LUT(i,1) + endif + else + if (i==1) then + output = LUT(i,1) + else + denominator = max(1e-20, (LUT(i,2) - LUT(i-1,2))) + fraction = (LUT(i,2) - value) / denominator + + output = fraction * LUT(i-1,1) + (1-fraction) * LUT(i,1) + endif + endif + end function look_up + + + ! calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. + function get_random_from_cdf(cdf_value) result(random_normal) + implicit none + real, intent(in) :: cdf_value + real :: random_normal + + integer :: index + double precision :: real_index + double precision :: fraction + + if (.not.allocated(fast_cdf_lookup_table)) then + !$omp critical(cdf_lookup) + call create_cdf_lookup_table() + !$omp end critical(cdf_lookup) + endif + + ! in the fast look up table we can just compute the optimal index + real_index = cdf_value * 1.0d0 * size_of_normal_cdf_lookup_table + index = min(size_of_normal_cdf_lookup_table, max(1, nint( real_index)) ) + + ! then interpolate between this index and the next closest + if (index > real_index) then + if (index == 1) then + random_normal = fast_cdf_lookup_table(index) + else + fraction = (index - real_index) + random_normal = (1-fraction) * fast_cdf_lookup_table(index) & + + fraction * fast_cdf_lookup_table(max(1,index-1)) + endif + else + if (index == size_of_normal_cdf_lookup_table) then + random_normal = fast_cdf_lookup_table(index) + else + fraction = (real_index - index) + random_normal = (1-fraction) * fast_cdf_lookup_table(index) & + + fraction * fast_cdf_lookup_table(min(size_of_normal_cdf_lookup_table,index+1)) + endif + endif + + end function get_random_from_cdf subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold) implicit none @@ -88,7 +228,7 @@ subroutine sample_distribution(output, mean_values, error_term, uniform_random_v if (present(exceedence_probability)) then if (exceedence_probability(i) > (1-uniform_random_values(i))) then - ! random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) ) + random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) ) endif endif From 71a571485c9064e8250a47fa1e96fe2228efae3c Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Sat, 11 Feb 2017 02:08:39 -0700 Subject: [PATCH 14/23] Added comments to random --- src/utilities/random.f90 | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/utilities/random.f90 b/src/utilities/random.f90 index 6b9ef4e..0453942 100644 --- a/src/utilities/random.f90 +++ b/src/utilities/random.f90 @@ -74,6 +74,12 @@ function real_normal_cdf(normal) result(cdf) end function real_normal_cdf + !>------------------------------------------------ + !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008) + !! + !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429. + !! + !!------------------------------------------------ function double_normal_cdf(normal) result(cdf) implicit none double precision :: normal @@ -87,6 +93,10 @@ function double_normal_cdf(normal) result(cdf) end function double_normal_cdf + !>------------------------------------------------ + !! Create a look up table to convert from Uniform random number to Normal random + !! + !!------------------------------------------------ subroutine create_cdf_lookup_table() implicit none @@ -120,7 +130,10 @@ subroutine create_cdf_lookup_table() end subroutine create_cdf_lookup_table - ! perform a binary search through LUT to find value, then interpolate between the two closest entries. + !>------------------------------------------------ + !! Perform a binary search through LUT to find value, then interpolate between the two closest entries. + !! + !!------------------------------------------------ function look_up(value, LUT) result(output) implicit none double precision, intent(in) :: value @@ -168,7 +181,10 @@ function look_up(value, LUT) result(output) end function look_up - ! calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. + !>------------------------------------------------ + !! Calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. + !! + !!------------------------------------------------ function get_random_from_cdf(cdf_value) result(random_normal) implicit none real, intent(in) :: cdf_value From ea1eb5fed1d735aced18782fbfdcbee006ef1332 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Sun, 12 Feb 2017 23:28:51 -0700 Subject: [PATCH 15/23] added code to process the random error and POP terms This needs to be done before the qq_normal remapping, or any post quantile mapping correction --- src/downscaling_stats/downscaling.f90 | 16 +++++ src/downscaling_stats/sampling.f90 | 86 +++++++++++++++++++++++++++ src/makefile | 4 ++ src/utilities/random.f90 | 34 ++--------- 4 files changed, 111 insertions(+), 29 deletions(-) create mode 100644 src/downscaling_stats/sampling.f90 diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index f91b556..e2eb3bd 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -10,6 +10,7 @@ module downscaling_mod use basic_stats_mod, only : stddev use io_routines, only : file_exists, io_read use random_mod, only : box_muller_random + use sampling_mod, only : sample_distribution implicit none real, parameter :: LOG_FILL_VALUE = 1e-30 @@ -18,6 +19,8 @@ module downscaling_mod integer*8, dimension(10) :: master_timers real :: random_sample(N_RANDOM_SAMPLES) + + logical :: post_process_errors = .False. contains subroutine downscale(training_atm, training_obs, predictors, output, options) @@ -270,6 +273,19 @@ subroutine downscale(training_atm, training_obs, predictors, output, options) output%variables(v)%logistic ( :, i, j), & current_threshold, options, timers, i,j) + if (post_process_errors) then + if (current_threshold /= kFILL_VALUE) then + call sample_distribution(output%variables(v)%data(:,i,j), & + output%variables(v)%data(:,i,j), & + output%variables(v)%errors(:,i, j), & + exceedence_probability=output%variables(v)%logistic(:,i,j), & + threshold = current_threshold) + else + call sample_distribution(output%variables(v)%data(:,i,j), & + output%variables(v)%data(:,i,j), & + output%variables(v)%errors(:,i, j)) + endif + endif ! if the input data were transformed with e.g. a cube root or log transform, then reverse that transformation for the output call System_Clock(timeone) call transform_data(options%obs%input_Xforms(v), output%variables(v)%data(:,i,j), 1, noutput, & diff --git a/src/downscaling_stats/sampling.f90 b/src/downscaling_stats/sampling.f90 new file mode 100644 index 0000000..6a40819 --- /dev/null +++ b/src/downscaling_stats/sampling.f90 @@ -0,0 +1,86 @@ +module sampling_mod + + use random_mod, only : normal_cdf, box_muller_random, get_normal_from_cdf + implicit none +contains + + subroutine sample_distribution(output, mean_values, error_term, & + normal_random_values, uniform_random_values, & + exceedence_probability, threshold) + implicit none + real, intent(out) :: output(:) + real, intent(in) :: mean_values(:), error_term(:) + real, intent(in), optional :: normal_random_values(:) + real, intent(in), optional :: exceedence_probability(:) + real, intent(in), optional :: uniform_random_values(:) + real, intent(in), optional :: threshold + + integer :: i,n + real :: random_normal, poe, threshold_internal + real, allocatable :: uniform(:), normal(:) + + threshold_internal = 0 + if (present(threshold)) threshold_internal = threshold + + n = size(mean_values) + ! the following section just ensures that optional variables have the necessary internal variables created. + if (present(exceedence_probability)) then + allocate(uniform(n)) + if (present(uniform_random_values)) then + uniform = uniform_random_values + else + if (present(normal_random_values)) then + ! if we were given normal random values, but not uniform, then create uniform from normal + do i=1,n + uniform(i) = normal_cdf( normal_random_values(i) ) + enddo + else + ! While this will generate n uniform random numbers, there will not be any spatio-temporal + ! auto-correlation to them. + call random_number(uniform) + endif + endif + else + allocate(normal(n)) + if (present(normal_random_values)) then + normal = normal_random_values + else + if (present(uniform_random_values)) then + normal = uniform_random_values + else + call random_number(normal) + endif + call box_muller_random(normal) + endif + endif + + ! this is the heart of the distribution sampling code + do i=1,n + if (present(exceedence_probability)) then + if (uniform(i) > (1-exceedence_probability(i)) ) then + + ! if random number is greater than the non-exceedence probability, then we have exceeded it... + ! rescale the uniform random number for the region of exceedence and convert to a normal random number + random_normal = get_normal_from_cdf((uniform(i) - (1 - exceedence_probability(i))) / (exceedence_probability(i)) ) + + ! ultimately this is the same equation as below for variables without an exceedence threshold + output(i) = mean_values(i) + error_term(i) * random_normal + + ! also check, if the random term has pushed the value below the threshold anyway, then just set to the threshold + if (output(i) < threshold_internal) then + output(i) = threshold_internal + endif + else + ! if this value did not exceed the threshold, then just set it to the threshold. + output(i) = threshold_internal + endif + else + ! standard approach is to take the residuals multiply by a normal random variate and add to the mean prediction + output(i) = mean_values(i) + error_term(i) * normal(i) + endif + enddo + + end subroutine sample_distribution + + +end module sampling_mod diff --git a/src/makefile b/src/makefile index ed3f33e..247e3a3 100644 --- a/src/makefile +++ b/src/makefile @@ -283,6 +283,7 @@ OBJS = \ $(BUILD)geo_reader.o \ $(BUILD)basic_stats.o \ $(BUILD)random.o \ + $(BUILD)sampling.o \ $(BUILD)sorting.o \ $(BUILD)quantile_mapping.o \ $(BUILD)analogs.o \ @@ -414,6 +415,9 @@ $(BUILD)sorting.o: utilities/sorting.f90 utilities/qsort_inline.inc $(BUILD)random.o: utilities/random.f90 $(BUILD)model_constants.o $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ +$(BUILD)sampling.o: downscaling_stats/sampling.f90 $(BUILD)random.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + $(BUILD)basic_stats.o: utilities/basic_stats.f90 $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ diff --git a/src/utilities/random.f90 b/src/utilities/random.f90 index 0453942..628811e 100644 --- a/src/utilities/random.f90 +++ b/src/utilities/random.f90 @@ -4,6 +4,7 @@ module random_mod implicit none + private double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8) integer, parameter :: size_of_normal_cdf_lookup_table = 1000000 @@ -13,6 +14,8 @@ module random_mod module procedure real_normal_cdf, double_normal_cdf end interface + public :: normal_cdf, box_muller_random, get_normal_from_cdf + contains !>------------------------------------------------ @@ -185,7 +188,7 @@ end function look_up !! Calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. !! !!------------------------------------------------ - function get_random_from_cdf(cdf_value) result(random_normal) + function get_normal_from_cdf(cdf_value) result(random_normal) implicit none real, intent(in) :: cdf_value real :: random_normal @@ -223,34 +226,7 @@ function get_random_from_cdf(cdf_value) result(random_normal) endif endif - end function get_random_from_cdf - - subroutine sample_distribution(output, mean_values, error_term, uniform_random_values, exceedence_probability, threshold) - implicit none - real, intent(out) :: output(:) - real, intent(in) :: mean_values(:), error_term(:), uniform_random_values(:) - real, intent(in), optional :: exceedence_probability(:) - real, intent(in), optional :: threshold - - integer :: i,n - real :: random_normal, poe, threshold_internal - - threshold_internal = 0 - if (present(threshold)) threshold_internal = threshold - - n = size(mean_values) - - do i=1,n - if (present(exceedence_probability)) then - if (exceedence_probability(i) > (1-uniform_random_values(i))) then - - random_normal = get_random_from_cdf(uniform_random_values(i) - exceedence_probability(i) / (1-exceedence_probability(i)) ) + end function get_normal_from_cdf - endif - endif - - enddo - output = 1 - end subroutine sample_distribution end module random_mod From f728eed763e3c31676d3777dd686b9370dfb9944 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Sun, 12 Feb 2017 23:40:31 -0700 Subject: [PATCH 16/23] updates to random and qm tests --- src/tests/test_qm.f90 | 14 ++++++++++++-- src/tests/test_random.f90 | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/tests/test_qm.f90 b/src/tests/test_qm.f90 index a871fbe..6852a3b 100644 --- a/src/tests/test_qm.f90 +++ b/src/tests/test_qm.f90 @@ -17,7 +17,7 @@ program test_qm implicit none logical :: verbose - integer*8, parameter :: n = 100000 + integer*8, parameter :: n = 10000 real, parameter :: acceptable_error = 1 ! 1e-3 <- use this with more QM segments and train on the complete dataset real, allocatable, dimension(:) :: input_data, matching_data @@ -109,13 +109,19 @@ subroutine test_mapping(input_data, matching_data, name) print*, "To match:",sum(matching_data)/n print*, " min/max",minval(matching_data),maxval(matching_data) print*, "---------------------------" + if (n < 20) then + print*, input_data + print*, "---------------------------" + print*, matching_data + print*, "---------------------------" + endif endif call system_clock(start_time) !------------------------------- ! Develop the Quantile mapping !------------------------------- - call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, 300)! min(size(input_data)/2, size(matching_data)/2)) + call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, min(300,min(size(input_data)/2, size(matching_data)/2))) call system_clock(end_time, COUNT_RATE, COUNT_MAX) if (start_time>end_time) end_time=end_time+COUNT_MAX @@ -185,6 +191,10 @@ subroutine show_results(qmdata, original, name, time) print*, "Q-Mapped Min/Max = ",minval(qmdata), maxval(qmdata) print*, "Original Min/Max = ",minval(original), maxval(original) print*, "---------------------------" + if (n < 20) then + print*, qmdata + print*, "---------------------------" + endif print*, " Elapsed time = ", time endif call pass_fail(error, " Stddev:") diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 index c1dbf5b..43d9fe0 100644 --- a/src/tests/test_random.f90 +++ b/src/tests/test_random.f90 @@ -45,7 +45,7 @@ subroutine test_uniform_to_normal() ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails do i = 1, ntest_values - normal = get_random_from_cdf( cdf_values(i) ) + normal = get_normal_from_cdf( cdf_values(i) ) if ((abs(normal_values(i) - normal ) > 1e-2) .and. (abs(normal_values(i) - normal )/abs(normal_values(i)) > 2e-1)) then passing = .False. print*, "" From 9cdcda9a9ca5a8e4b0612808ade6eaeac93edbcb Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Tue, 14 Feb 2017 16:51:07 -0700 Subject: [PATCH 17/23] bug fixes (wouldn't compile after merge) --- src/config/configuration.f90 | 4 ++-- src/utilities/basic_stats.f90 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/config/configuration.f90 b/src/config/configuration.f90 index 27477c5..e8616f4 100644 --- a/src/config/configuration.f90 +++ b/src/config/configuration.f90 @@ -158,13 +158,13 @@ subroutine read_base_options(options) ! this is the time period to use when calculating e.g. quantile mapping transformations call options%transform_start%init("gregorian") if (start_transform==end_transform) then - Print* "WARNING: If you are using any transforms start and end date should not be the same" + write(*,*) "WARNING: If you are using any transforms start and end date should not be the same" endif call options%transform_start%set(start_transform) call options%transform_stop%init("gregorian") call options%transform_stop%set(end_transform) ! this is the time period to use when calculating e.g. quantile mapping transformations for post processing - if (maxval(post_correction_Xform) /= kNO_TRANSFORM) then + if (maxval(post_correction_transform) /= kNO_TRANSFORM) then call options%post_start%init("gregorian") if (start_post=="") then stop "ERROR must set a post-processing start date" diff --git a/src/utilities/basic_stats.f90 b/src/utilities/basic_stats.f90 index f5bd5a4..79e03d7 100644 --- a/src/utilities/basic_stats.f90 +++ b/src/utilities/basic_stats.f90 @@ -100,7 +100,7 @@ subroutine time_stddev(input, stddev, mean_in) do x=1,nx difference = input(:,x,y) - mean(x,y) ! in case there are bad input data, prevents a floating point overflow - where(difference > 1e10) = 1e10 + where(difference > 1e10) difference = 1e10 stddev(x,y) = sqrt( sum( difference**2 ) / (nt-1) ) end do end do From 08a33fb6ad4e722e15e80e7f352454be5c3ef890 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Tue, 14 Feb 2017 21:48:47 -0700 Subject: [PATCH 18/23] added an error check to setup timing Now checks that obs and atm training data have the same number of time steps available in the training period. --- src/downscaling_stats/downscaling.f90 | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index e2eb3bd..92c8925 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -19,7 +19,7 @@ module downscaling_mod integer*8, dimension(10) :: master_timers real :: random_sample(N_RANDOM_SAMPLES) - + logical :: post_process_errors = .False. contains @@ -1121,6 +1121,7 @@ subroutine setup_timing(training_atm, training_obs, predictors, options) type(atm), intent(inout) :: training_atm, predictors type(obs), intent(inout) :: training_obs type(config), intent(in) :: options + integer :: n_atm_train, n_obs_train write(*,*) "-------------------" write(*,*) "Training ATM" @@ -1132,6 +1133,13 @@ subroutine setup_timing(training_atm, training_obs, predictors, options) write(*,*) "Predictors" call setup_time_indices(predictors, options) + n_obs_train = training_obs%training_end - training_obs%training_start + n_atm_train = training_atm%training_end - training_atm%training_start + + if (n_obs_train /= n_atm_train) then + stop "ERROR Inconsistent time periods in training atm and obs data" + endif + end subroutine setup_timing subroutine allocate_data(output, n_obs_variables, noutput, nx, ny, n_atm_variables, tr_size, options) From 7d98e0df912cf95659f267ba36bcf5bcec4df9c6 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Tue, 14 Feb 2017 21:49:59 -0700 Subject: [PATCH 19/23] minor bux fix for timing check --- src/downscaling_stats/downscaling.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index 92c8925..3265d5b 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -1133,8 +1133,8 @@ subroutine setup_timing(training_atm, training_obs, predictors, options) write(*,*) "Predictors" call setup_time_indices(predictors, options) - n_obs_train = training_obs%training_end - training_obs%training_start - n_atm_train = training_atm%training_end - training_atm%training_start + n_obs_train = training_obs%training_stop - training_obs%training_start + n_atm_train = training_atm%training_stop - training_atm%training_start if (n_obs_train /= n_atm_train) then stop "ERROR Inconsistent time periods in training atm and obs data" From c3317cbf9cac6fa6366f2c3105005ae5fe585683 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Wed, 15 Feb 2017 10:08:57 -0700 Subject: [PATCH 20/23] modified lapack regression to test for exceedingly large input values --- src/downscaling_stats/regression.f90 | 56 +++++++++++++++------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/downscaling_stats/regression.f90 b/src/downscaling_stats/regression.f90 index 65c232b..ae37fc9 100644 --- a/src/downscaling_stats/regression.f90 +++ b/src/downscaling_stats/regression.f90 @@ -171,33 +171,39 @@ subroutine lapack_least_squares(X, Y, B, working_space, info) nrhs = 1 - if (present(working_space)) then - ! First find the optimal work size (LWORK) - LWORK = -1 - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) - LWORK = MIN( size(working_space), INT( working_space( 1 ) ) ) - - ! Now solve the equations X*B = Y - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) + if (max(maxval(X), maxval(Y)) > 1e8) then + print*, "ERROR regression inputs out of range" + print*, maxval(X), maxval(Y) + innerINFO = -1 else - ! First find the optimal work size (LWORK) - LWORK = -1 - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) - LWORK = MIN( 1000, INT( WORK( 1 ) ) ) + if (present(working_space)) then + ! First find the optimal work size (LWORK) + LWORK = -1 + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) + LWORK = MIN( size(working_space), INT( working_space( 1 ) ) ) + + ! Now solve the equations X*B = Y + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) + else + ! First find the optimal work size (LWORK) + LWORK = -1 + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) + LWORK = MIN( 1000, INT( WORK( 1 ) ) ) - ! Now solve the equations X*B = Y - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) - endif - if (innerINFO/=0) then - ! !$omp critical (print_lock) - ! if (innerINFO<0) then - ! write(*,*) "ERROR in SGELS with argument:", 0-innerINFO - ! else - ! write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO - ! endif - ! !$omp end critical (print_lock) - Y(1) = sum(Y)/size(Y) - Y(2:)= 0 + ! Now solve the equations X*B = Y + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) + endif + if (innerINFO/=0) then + ! !$omp critical (print_lock) + ! if (innerINFO<0) then + ! write(*,*) "ERROR in SGELS with argument:", 0-innerINFO + ! else + ! write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO + ! endif + ! !$omp end critical (print_lock) + Y(1) = sum(Y)/size(Y) + Y(2:)= 0 + endif endif if (present(info)) info = innerINFO From 3fb615c830958d0edb406982f29fe2acaf364f25 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Wed, 15 Feb 2017 16:33:51 -0700 Subject: [PATCH 21/23] add NaN checks for logistic regression, requires ieee_arithmetic --- src/downscaling_stats/downscaling.f90 | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index 3265d5b..116bcd1 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -1,5 +1,6 @@ module downscaling_mod + USE ieee_arithmetic use data_structures use string, only : str use regression_mod, only : compute_regression, compute_logistic_regression @@ -873,12 +874,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error, call System_Clock(timetwo) timers(9) = timers(9) + (timetwo-timeone) - ! compute the logistic as the probability of threshold exceedance in the analog population call System_Clock(timeone) - logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold) + if (options%analog_weights) then + ! compute the logistic as the weighted probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold, weights) + else + ! compute the logistic as the probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold) + endif else if (options%analog_weights) then + ! compute the logistic as the weighted probability of threshold exceedance in the analog population logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights) else ! compute the logistic as the probability of threshold exceedance in the analog population @@ -888,8 +895,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error, else logistic = compute_logistic_regression(x, regression_data, obs_analogs, coefficients, logistic_threshold) - where( coefficients > 1e20 ) coefficients = 1e20 - where( coefficients < -1e20 ) coefficients = -1e20 + + ! Check for severe errors in the logistic regression. This can happen with some input data. + ! If it fails, fall back to the analog exceedence calculation + if (ieee_is_nan(logistic)) then + if (options%analog_weights) then + logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights) + else + ! compute the logistic as the probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs, logistic_threshold) + endif + + endif if (options%debug) then do j = 1,nvars output_coeff(j+nvars) = coefficients(j) From 6785edc9d832f836ca9059582d2da23cc8c0c436 Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Wed, 15 Feb 2017 16:34:23 -0700 Subject: [PATCH 22/23] check size of input analog array in find_analogs --- src/downscaling_stats/analogs.f90 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/downscaling_stats/analogs.f90 b/src/downscaling_stats/analogs.f90 index b4d274d..a73ada4 100644 --- a/src/downscaling_stats/analogs.f90 +++ b/src/downscaling_stats/analogs.f90 @@ -43,6 +43,10 @@ subroutine find_analogs(analogs, match, input, n, threshold, weights, skip_analo ! fast selection (O(n_inputs log(nanalogs)) worst case, typically closer to O(n_inputs)) if (n>0) then if (.not.allocated(analogs)) allocate(analogs(n)) + if (size(analogs) < n) then + deallocate(analogs) + allocate(analogs(n)) + endif call top_n_analogs(distances, n, analogs) elseif (threshold>0) then distances = distances / nvars ! normalize by the number of variables so the supplied threshold doesn't have to changes From ce88de065abfa538a59508af188e338012ae9fab Mon Sep 17 00:00:00 2001 From: Ethan Gutmann Date: Wed, 15 Feb 2017 16:35:15 -0700 Subject: [PATCH 23/23] removed a test for crazy inputs in regression, added omp print_lock --- src/downscaling_stats/regression.f90 | 62 ++++++++++++---------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/src/downscaling_stats/regression.f90 b/src/downscaling_stats/regression.f90 index ae37fc9..1a38c8d 100644 --- a/src/downscaling_stats/regression.f90 +++ b/src/downscaling_stats/regression.f90 @@ -33,10 +33,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err allocate(varlist(nvars)) varlist = -1 - varlist(1) = 1 ! this assumes there is a constant passed in for the first variable... - useable_vars = 1 ! the constant - - ! find all vars that have some variability through time. + varlist(1) = 1 ! this assumes there is a constant passed in for the first variable... + useable_vars = 1 ! the constant + + ! find all vars that have some variability through time. do i=2,nvars nonzero_count = 0 do j=1,ntimes @@ -45,7 +45,7 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err endif enddo ! need at least two data points that are different from the first data point - ! to ensure that the regression on that variable might be useful. + ! to ensure that the regression on that variable might be useful. if ( nonzero_count > 2 ) then useable_vars = useable_vars + 1 varlist(i) = i @@ -116,8 +116,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err if (present(weights)) then if (allocated(weights)) then if (size(weights)/=size(training_y_lp)) then + !$omp critical (print_lock) write(*,*) "ERROR size of weights /= data" write(*,*), shape(weights), shape(training_y_lp) + !$omp end critical (print_lock) endif if (present(y_test)) then error = sqrt( sum(((training_y_lp - y_test)**2)*weights) / sum(weights) ) @@ -171,39 +173,27 @@ subroutine lapack_least_squares(X, Y, B, working_space, info) nrhs = 1 - if (max(maxval(X), maxval(Y)) > 1e8) then - print*, "ERROR regression inputs out of range" - print*, maxval(X), maxval(Y) - innerINFO = -1 + ! passing working_space in and out *could* be more efficient. Only used in tests right now + if (present(working_space)) then + ! First find the optimal work size (LWORK) + LWORK = -1 + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) + LWORK = MIN( size(working_space), INT( working_space( 1 ) ) ) + + ! Now solve the equations X*B = Y + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) else - if (present(working_space)) then - ! First find the optimal work size (LWORK) - LWORK = -1 - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) - LWORK = MIN( size(working_space), INT( working_space( 1 ) ) ) - - ! Now solve the equations X*B = Y - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, working_space, LWORK, innerINFO ) - else - ! First find the optimal work size (LWORK) - LWORK = -1 - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) - LWORK = MIN( 1000, INT( WORK( 1 ) ) ) + ! First find the optimal work size (LWORK) + LWORK = -1 + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) + LWORK = MIN( 1000, INT( WORK( 1 ) ) ) - ! Now solve the equations X*B = Y - CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) - endif - if (innerINFO/=0) then - ! !$omp critical (print_lock) - ! if (innerINFO<0) then - ! write(*,*) "ERROR in SGELS with argument:", 0-innerINFO - ! else - ! write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO - ! endif - ! !$omp end critical (print_lock) - Y(1) = sum(Y)/size(Y) - Y(2:)= 0 - endif + ! Now solve the equations X*B = Y + CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) + endif + if (innerINFO/=0) then + Y(1) = sum(Y)/size(Y) + Y(2:)= 0 endif if (present(info)) info = innerINFO