diff --git a/.gitignore b/.gitignore index 73894b5..1996925 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,5 @@ test_regression test_calendar test_qm test_config +test_random gard diff --git a/.travis.yml b/.travis.yml index 1b74a82..2c5b194 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ os: - linux addons: apt: - source: + sources: - ubuntu-toolchain-r-test packages: - gfortran diff --git a/src/config/configuration.f90 b/src/config/configuration.f90 index 68b0484..e8616f4 100644 --- a/src/config/configuration.f90 +++ b/src/config/configuration.f90 @@ -97,10 +97,10 @@ subroutine read_base_options(options) end_date = "" start_train = "" end_train = "" - start_transform = "" - end_transform = "" - post_start = "" - post_end = "" + start_transform = "1980-01-01 00:00:00" + end_transform = "1980-01-01 00:00:00" + start_post = "" + end_post = "" output_file = "gard_out_" n_analogs = -1 @@ -157,27 +157,24 @@ subroutine read_base_options(options) ! this is the time period to use when calculating e.g. quantile mapping transformations call options%transform_start%init("gregorian") - if (start_transform=="") then - stop "ERROR must set a transform start date" + if (start_transform==end_transform) then + write(*,*) "WARNING: If you are using any transforms start and end date should not be the same" endif call options%transform_start%set(start_transform) call options%transform_stop%init("gregorian") - if (end_transform=="") then - stop "ERROR must set a transform end date" - endif call options%transform_stop%set(end_transform) ! this is the time period to use when calculating e.g. quantile mapping transformations for post processing - if (maxval(post_correction_Xform) /= kNO_TRANSFORM) then + if (maxval(post_correction_transform) /= kNO_TRANSFORM) then call options%post_start%init("gregorian") - if (post_start=="") then + if (start_post=="") then stop "ERROR must set a post-processing start date" endif - call options%post_start%set(post_start) + call options%post_start%set(start_post) call options%post_end%init("gregorian") - if (post_end=="") then + if (end_post=="") then stop "ERROR must set a post-processing end date" endif - call options%post_end%set(post_end) + call options%post_end%set(end_post) endif options%training_file = training_file diff --git a/src/downscaling_stats/analogs.f90 b/src/downscaling_stats/analogs.f90 index b4d274d..a73ada4 100644 --- a/src/downscaling_stats/analogs.f90 +++ b/src/downscaling_stats/analogs.f90 @@ -43,6 +43,10 @@ subroutine find_analogs(analogs, match, input, n, threshold, weights, skip_analo ! fast selection (O(n_inputs log(nanalogs)) worst case, typically closer to O(n_inputs)) if (n>0) then if (.not.allocated(analogs)) allocate(analogs(n)) + if (size(analogs) < n) then + deallocate(analogs) + allocate(analogs(n)) + endif call top_n_analogs(distances, n, analogs) elseif (threshold>0) then distances = distances / nvars ! normalize by the number of variables so the supplied threshold doesn't have to changes diff --git a/src/downscaling_stats/downscaling.f90 b/src/downscaling_stats/downscaling.f90 index 5244305..8858512 100644 --- a/src/downscaling_stats/downscaling.f90 +++ b/src/downscaling_stats/downscaling.f90 @@ -1,5 +1,6 @@ module downscaling_mod + USE ieee_arithmetic use data_structures use string, only : str use regression_mod, only : compute_regression, compute_logistic_regression @@ -9,14 +10,20 @@ module downscaling_mod use time_util, only : setup_time_indices use basic_stats_mod, only : stddev use io_routines, only : file_exists, io_read + use random_mod, only : box_muller_random + use sampling_mod, only : sample_distribution implicit none + real, parameter :: LOG_FILL_VALUE = 1e-30 + real, parameter :: MAX_ALLOWED_SIGMA = 20 + integer, parameter :: N_RANDOM_SAMPLES = 10000 + integer*8, dimension(10) :: master_timers - real, parameter :: LOG_FILL_VALUE = 1e-30 - real, parameter :: MAX_ALLOWED_SIGMA = 20 - real, parameter :: N_RANDOM_SAMPLES = 10000 + real :: random_sample(N_RANDOM_SAMPLES) + logical :: post_process_errors = .False. + contains subroutine downscale(training_atm, training_obs, predictors, output, options) implicit none @@ -268,6 +275,21 @@ subroutine downscale(training_atm, training_obs, predictors, output, options) output%variables(v)%logistic ( :, i, j), & current_threshold, options, timers, i,j) + + if (post_process_errors) then + if (current_threshold /= kFILL_VALUE) then + call sample_distribution(output%variables(v)%data(:,i,j), & + output%variables(v)%data(:,i,j), & + output%variables(v)%errors(:,i, j), & + exceedence_probability=output%variables(v)%logistic(:,i,j), & + threshold = current_threshold) + else + call sample_distribution(output%variables(v)%data(:,i,j), & + output%variables(v)%data(:,i,j), & + output%variables(v)%errors(:,i, j)) + endif + endif + ! if the input data were transformed with e.g. a cube root or log transform, then reverse that transformation for the output call System_Clock(timeone) call transform_data(options%obs%input_Xforms(v), output%variables(v)%data(:,i,j), 1, noutput, & @@ -560,46 +582,6 @@ subroutine transform_data(transform_type, pred_data, & end select end subroutine transform_data - !>------------------------------------------------ - !! Use the Box-Muller Transform to convert uniform to normal random deviates - !! - !! Note random_sample should be an allocated 1D real array - !! On return, random_sample will be filled with random normal (0,1) data - !! - !! Caveat: this transform is unable to generate extremely high values (>6.66) - !! Having switched to double precision it may do better now. - !! - !------------------------------------------------- - subroutine box_muller_random(random_sample) - implicit none - real, intent(inout) :: random_sample(:) - integer :: n,i - - double precision :: u1, u2, s - double precision, allocatable :: double_random(:) - - n = size(random_sample) - allocate(double_random(n)) - call random_number(double_random) - - do i=1,n,2 - u1 = double_random(i) - if (i 0) then do i=1, n start = max(1, i - options%time_smooth) @@ -893,12 +877,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error, call System_Clock(timetwo) timers(9) = timers(9) + (timetwo-timeone) - ! compute the logistic as the probability of threshold exceedance in the analog population call System_Clock(timeone) - logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold) + if (options%analog_weights) then + ! compute the logistic as the weighted probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold, weights) + else + ! compute the logistic as the probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold) + endif else if (options%analog_weights) then + ! compute the logistic as the weighted probability of threshold exceedance in the analog population logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights) else ! compute the logistic as the probability of threshold exceedance in the analog population @@ -908,8 +898,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error, else logistic = compute_logistic_regression(x, regression_data, obs_analogs, coefficients, logistic_threshold) - where( coefficients > 1e20 ) coefficients = 1e20 - where( coefficients < -1e20 ) coefficients = -1e20 + + ! Check for severe errors in the logistic regression. This can happen with some input data. + ! If it fails, fall back to the analog exceedence calculation + if (ieee_is_nan(logistic)) then + if (options%analog_weights) then + logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights) + else + ! compute the logistic as the probability of threshold exceedance in the analog population + logistic = compute_analog_exceedance(obs, analogs, logistic_threshold) + endif + + endif if (options%debug) then do j = 1,nvars output_coeff(j+nvars) = coefficients(j) @@ -1141,6 +1141,7 @@ subroutine setup_timing(training_atm, training_obs, predictors, options) type(atm), intent(inout) :: training_atm, predictors type(obs), intent(inout) :: training_obs type(config), intent(in) :: options + integer :: n_atm_train, n_obs_train write(*,*) "-------------------" write(*,*) "Training ATM" @@ -1152,6 +1153,13 @@ subroutine setup_timing(training_atm, training_obs, predictors, options) write(*,*) "Predictors" call setup_time_indices(predictors, options) + n_obs_train = training_obs%training_stop - training_obs%training_start + n_atm_train = training_atm%training_stop - training_atm%training_start + + if (n_obs_train /= n_atm_train) then + stop "ERROR Inconsistent time periods in training atm and obs data" + endif + end subroutine setup_timing subroutine allocate_data(output, n_obs_variables, noutput, nx, ny, n_atm_variables, tr_size, options) diff --git a/src/downscaling_stats/regression.f90 b/src/downscaling_stats/regression.f90 index 65c232b..06348e0 100644 --- a/src/downscaling_stats/regression.f90 +++ b/src/downscaling_stats/regression.f90 @@ -33,10 +33,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err allocate(varlist(nvars)) varlist = -1 - varlist(1) = 1 ! this assumes there is a constant passed in for the first variable... - useable_vars = 1 ! the constant - - ! find all vars that have some variability through time. + varlist(1) = 1 ! this assumes there is a constant passed in for the first variable... + useable_vars = 1 ! the constant + + ! find all vars that have some variability through time. do i=2,nvars nonzero_count = 0 do j=1,ntimes @@ -45,7 +45,7 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err endif enddo ! need at least two data points that are different from the first data point - ! to ensure that the regression on that variable might be useful. + ! to ensure that the regression on that variable might be useful. if ( nonzero_count > 2 ) then useable_vars = useable_vars + 1 varlist(i) = i @@ -116,14 +116,17 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err if (present(weights)) then if (allocated(weights)) then if (size(weights)/=size(training_y_lp)) then + !$omp critical (print_lock) write(*,*) "ERROR size of weights /= data" write(*,*), shape(weights), shape(training_y_lp) + !$omp end critical (print_lock) endif if (present(y_test)) then error = sqrt( sum(((training_y_lp - y_test)**2)*weights) / sum(weights) ) else error = sqrt( sum(((training_y_lp - training_y)**2)*weights) / sum(weights) ) endif + else if (present(y_test)) then error = sqrt( sum((training_y_lp - y_test)**2) / ntimes ) @@ -171,6 +174,7 @@ subroutine lapack_least_squares(X, Y, B, working_space, info) nrhs = 1 + ! passing working_space in and out *could* be more efficient. Only used in tests right now if (present(working_space)) then ! First find the optimal work size (LWORK) LWORK = -1 @@ -189,13 +193,6 @@ subroutine lapack_least_squares(X, Y, B, working_space, info) CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO ) endif if (innerINFO/=0) then - ! !$omp critical (print_lock) - ! if (innerINFO<0) then - ! write(*,*) "ERROR in SGELS with argument:", 0-innerINFO - ! else - ! write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO - ! endif - ! !$omp end critical (print_lock) Y(1) = sum(Y)/size(Y) Y(2:)= 0 endif diff --git a/src/downscaling_stats/sampling.f90 b/src/downscaling_stats/sampling.f90 new file mode 100644 index 0000000..6a40819 --- /dev/null +++ b/src/downscaling_stats/sampling.f90 @@ -0,0 +1,86 @@ +module sampling_mod + + use random_mod, only : normal_cdf, box_muller_random, get_normal_from_cdf + implicit none +contains + + subroutine sample_distribution(output, mean_values, error_term, & + normal_random_values, uniform_random_values, & + exceedence_probability, threshold) + implicit none + real, intent(out) :: output(:) + real, intent(in) :: mean_values(:), error_term(:) + real, intent(in), optional :: normal_random_values(:) + real, intent(in), optional :: exceedence_probability(:) + real, intent(in), optional :: uniform_random_values(:) + real, intent(in), optional :: threshold + + integer :: i,n + real :: random_normal, poe, threshold_internal + real, allocatable :: uniform(:), normal(:) + + threshold_internal = 0 + if (present(threshold)) threshold_internal = threshold + + n = size(mean_values) + ! the following section just ensures that optional variables have the necessary internal variables created. + if (present(exceedence_probability)) then + allocate(uniform(n)) + if (present(uniform_random_values)) then + uniform = uniform_random_values + else + if (present(normal_random_values)) then + ! if we were given normal random values, but not uniform, then create uniform from normal + do i=1,n + uniform(i) = normal_cdf( normal_random_values(i) ) + enddo + else + ! While this will generate n uniform random numbers, there will not be any spatio-temporal + ! auto-correlation to them. + call random_number(uniform) + endif + endif + else + allocate(normal(n)) + if (present(normal_random_values)) then + normal = normal_random_values + else + if (present(uniform_random_values)) then + normal = uniform_random_values + else + call random_number(normal) + endif + call box_muller_random(normal) + endif + endif + + ! this is the heart of the distribution sampling code + do i=1,n + if (present(exceedence_probability)) then + if (uniform(i) > (1-exceedence_probability(i)) ) then + + ! if random number is greater than the non-exceedence probability, then we have exceeded it... + ! rescale the uniform random number for the region of exceedence and convert to a normal random number + random_normal = get_normal_from_cdf((uniform(i) - (1 - exceedence_probability(i))) / (exceedence_probability(i)) ) + + ! ultimately this is the same equation as below for variables without an exceedence threshold + output(i) = mean_values(i) + error_term(i) * random_normal + + ! also check, if the random term has pushed the value below the threshold anyway, then just set to the threshold + if (output(i) < threshold_internal) then + output(i) = threshold_internal + endif + else + ! if this value did not exceed the threshold, then just set it to the threshold. + output(i) = threshold_internal + endif + else + ! standard approach is to take the residuals multiply by a normal random variate and add to the mean prediction + output(i) = mean_values(i) + error_term(i) * normal(i) + endif + enddo + + end subroutine sample_distribution + + +end module sampling_mod diff --git a/src/makefile b/src/makefile index b67a281..247e3a3 100644 --- a/src/makefile +++ b/src/makefile @@ -282,6 +282,8 @@ OBJS = \ $(BUILD)obs_io.o \ $(BUILD)geo_reader.o \ $(BUILD)basic_stats.o \ + $(BUILD)random.o \ + $(BUILD)sampling.o \ $(BUILD)sorting.o \ $(BUILD)quantile_mapping.o \ $(BUILD)analogs.o \ @@ -319,6 +321,10 @@ REGRESSION_TEST_OBJS = \ $(BUILD)regression.o \ $(BUILD)test_regression.o +RANDOM_TEST_OBJS = \ + $(BUILD)test_random.o \ + $(BUILD)random.o \ + $(BUILD)model_constants.o # These are the primary user facing rules # @@ -329,7 +335,7 @@ model: gard all: gard test -test: test_calendar test_sort test_qm test_config test_regression +test: test_calendar test_sort test_qm test_config test_regression test_random install: gard cp gard $(INSTALL_DIR)/ @@ -358,10 +364,14 @@ test_qm : $(QM_TEST_OBJS) test_sort : $(SORT_TEST_OBJS) $(FC) -o $@ $^ $(LDFLAGS) +test_random : $(RANDOM_TEST_OBJS) + $(FC) -o $@ $^ $(LDFLAGS) + test_config : $(CONFIG_TEST_OBJS) $(FC) -o $@ $^ $(LDFLAGS) + # specify specific builds and dependencies. # unit tests @@ -380,6 +390,9 @@ $(BUILD)test_qm.o: tests/test_qm.f90 $(BUILD)data_structures.o $(BUILD)quantile_ $(BUILD)test_config.o: tests/test_config.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o $(BUILD)configuration.o $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ +$(BUILD)test_random.o: tests/test_random.f90 $(BUILD)random.o $(BUILD)model_constants.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + # Date and time relevant objects $(BUILD)time.o: timing/time.f90 $(BUILD)model_constants.o @@ -399,6 +412,12 @@ $(BUILD)quantile_mapping.o: utilities/quantile_map.f90 $(BUILD)data_structures.o $(BUILD)sorting.o: utilities/sorting.f90 utilities/qsort_inline.inc $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ +$(BUILD)random.o: utilities/random.f90 $(BUILD)model_constants.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + +$(BUILD)sampling.o: downscaling_stats/sampling.f90 $(BUILD)random.o + $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ + $(BUILD)basic_stats.o: utilities/basic_stats.f90 $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ @@ -454,7 +473,7 @@ $(BUILD)atmosphere_io.o: io/atmosphere_io.f90 $(BUILD)data_structures.o \ $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ $(BUILD)downscaling.o: downscaling_stats/downscaling.f90 $(BUILD)data_structures.o $(BUILD)model_constants.o \ - $(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o + $(BUILD)quantile_mapping.o $(BUILD)regression.o $(BUILD)analogs.o $(BUILD)random.o $(FC) $(FCFLAGS) -c $< $(MODOUTPUT) -o $@ $(BUILD)analogs.o: downscaling_stats/analogs.f90 $(BUILD)string.o diff --git a/src/tests/test_qm.f90 b/src/tests/test_qm.f90 index a871fbe..6852a3b 100644 --- a/src/tests/test_qm.f90 +++ b/src/tests/test_qm.f90 @@ -17,7 +17,7 @@ program test_qm implicit none logical :: verbose - integer*8, parameter :: n = 100000 + integer*8, parameter :: n = 10000 real, parameter :: acceptable_error = 1 ! 1e-3 <- use this with more QM segments and train on the complete dataset real, allocatable, dimension(:) :: input_data, matching_data @@ -109,13 +109,19 @@ subroutine test_mapping(input_data, matching_data, name) print*, "To match:",sum(matching_data)/n print*, " min/max",minval(matching_data),maxval(matching_data) print*, "---------------------------" + if (n < 20) then + print*, input_data + print*, "---------------------------" + print*, matching_data + print*, "---------------------------" + endif endif call system_clock(start_time) !------------------------------- ! Develop the Quantile mapping !------------------------------- - call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, 300)! min(size(input_data)/2, size(matching_data)/2)) + call develop_qm(input_data(1:size(input_data)/2), matching_data, qm, min(300,min(size(input_data)/2, size(matching_data)/2))) call system_clock(end_time, COUNT_RATE, COUNT_MAX) if (start_time>end_time) end_time=end_time+COUNT_MAX @@ -185,6 +191,10 @@ subroutine show_results(qmdata, original, name, time) print*, "Q-Mapped Min/Max = ",minval(qmdata), maxval(qmdata) print*, "Original Min/Max = ",minval(original), maxval(original) print*, "---------------------------" + if (n < 20) then + print*, qmdata + print*, "---------------------------" + endif print*, " Elapsed time = ", time endif call pass_fail(error, " Stddev:") diff --git a/src/tests/test_random.f90 b/src/tests/test_random.f90 new file mode 100644 index 0000000..43d9fe0 --- /dev/null +++ b/src/tests/test_random.f90 @@ -0,0 +1,152 @@ +program test_random + + use random_mod + + implicit none + + print*, "-------------------------------------" + print*, "test_normal_to_cdf_conversion" + call test_normal_to_cdf_conversion() + + print*, "-------------------------------------" + print*, "test_box_muller_random(10000)" + call test_box_muller_random(10000) + print*, "-------------------------------------" + print*, "test_box_muller_random(999)" + call test_box_muller_random(9999) + + print*, "-------------------------------------" + print*, "test_uniform_to_normal" + call test_uniform_to_normal() + +contains + + subroutine test_uniform_to_normal() + + implicit none + integer, parameter :: ntest_values = 23 + + ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf + real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, & + -1.50, -1.00, -0.50, 0.000, 0.500, 1.00, 1.50, 2.00, & + 2.50, 3.00, 3.50, 4.00, 4.50, 5.00, 5.50] + + real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, & + 0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, & + 0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, & + 0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, & + 0.9999966, 0.9999997, 0.9999999810] + logical :: passing + integer :: i + real :: normal + + passing = .True. + print*, "WARNING: Passing criteria may not be strict enough, errors can be up to 20%" + + ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails + do i = 1, ntest_values + normal = get_normal_from_cdf( cdf_values(i) ) + if ((abs(normal_values(i) - normal ) > 1e-2) .and. (abs(normal_values(i) - normal )/abs(normal_values(i)) > 2e-1)) then + passing = .False. + print*, "" + print*, " Error = ",normal_values(i) - normal, (normal_values(i) - normal)/normal_values(i) + print*, " Expected = ", normal_values(i) + print*, " Got: ",normal, " for:",cdf_values(i) + endif + end do + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_uniform_to_normal + + + subroutine test_normal_to_cdf_conversion() + + implicit none + integer, parameter :: ntest_values = 23 + + ! hard coded expected CDF values for given normal value from scipy.stats.norm.cdf + real :: normal_values(ntest_values) = [-5.50, -5.00, -4.50, -4.00, -3.50, -3.00, -2.50, -2.00, & + -1.50, -1.00, -0.50, 0.000, 0.500, 1.00, 1.50, 2.00, & + 2.50, 3.00, 3.50, 4.00, 4.50, 5.00, 5.50] + + real :: cdf_values(ntest_values) = [0.000000019, 0.0000002, 0.0000033, 0.0000316, 0.0002326, & + 0.0013498, 0.0062096, 0.0227501, 0.0668072, 0.1586552, & + 0.3085375, 0.5000000, 0.6914624, 0.8413447, 0.9331927, & + 0.9772498, 0.9937903, 0.9986501, 0.9997673, 0.9999683, & + 0.9999966, 0.9999997, 0.9999999810] + logical :: passing + integer :: i + + passing = .True. + + ! iterate over all values to test, if expected value is more than 2e-3 away from the actual result it fails + do i = 1, ntest_values + if (abs(cdf_values(i) - normal_cdf( normal_values(i) ) ) > 2e-3) then + passing = .False. + print*, "" + print*, " Error = ",cdf_values(i) - normal_cdf( normal_values(i)) + print*, " Expected = ", cdf_values(i) + print*, " Got: ",normal_cdf( normal_values(i)), " for:",normal_values(i) + endif + end do + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_normal_to_cdf_conversion + + subroutine test_box_muller_random(n_random_values) + implicit none + + integer, intent(in) :: n_random_values + + real :: random_values(n_random_values) + real :: sigma, mean + integer :: i + logical :: passing + + passing = .True. + + call box_muller_random(random_values) + + mean = sum(random_values) / size(random_values) + if (abs(mean) > 1e-2) then + print*, " Error failed computing mean: ",mean + passing = .False. + endif + + sigma = 0 + do i = 1, n_random_values + sigma = sigma + (random_values(i) - mean)**2 + enddo + sigma = sqrt(sigma / (n_random_values - 1)) + if (abs(sigma - 1) > 1e-1) then + print*, " Error failed computing standard deviation: ", sigma + passing = .False. + endif + + + if (minval(random_values) > (-3)) then + print*, " Error failed computing minimum value: ", minval(random_values) + passing = .False. + endif + + if (maxval(random_values) < (3)) then + print*, " Error failed computing maximum value: ", maxval(random_values) + passing = .False. + endif + + if (passing) then + print*, " PASSED" + else + print*, " FAILED" + endif + end subroutine test_box_muller_random + +end program test_random diff --git a/src/utilities/basic_stats.f90 b/src/utilities/basic_stats.f90 index f5bd5a4..79e03d7 100644 --- a/src/utilities/basic_stats.f90 +++ b/src/utilities/basic_stats.f90 @@ -100,7 +100,7 @@ subroutine time_stddev(input, stddev, mean_in) do x=1,nx difference = input(:,x,y) - mean(x,y) ! in case there are bad input data, prevents a floating point overflow - where(difference > 1e10) = 1e10 + where(difference > 1e10) difference = 1e10 stddev(x,y) = sqrt( sum( difference**2 ) / (nt-1) ) end do end do diff --git a/src/utilities/random.f90 b/src/utilities/random.f90 new file mode 100644 index 0000000..628811e --- /dev/null +++ b/src/utilities/random.f90 @@ -0,0 +1,232 @@ +module random_mod + + use model_constants + + implicit none + + private + double precision, parameter :: root_pi_over_8 = 0.626657068658 ! sqrt(kPI/8) + + integer, parameter :: size_of_normal_cdf_lookup_table = 1000000 + double precision, allocatable :: fast_cdf_lookup_table(:), cdf_lookup_table(:,:) + + interface normal_cdf + module procedure real_normal_cdf, double_normal_cdf + end interface + + public :: normal_cdf, box_muller_random, get_normal_from_cdf + +contains + + !>------------------------------------------------ + !! Use the Box-Muller Transform to convert uniform to normal random deviates + !! + !! Note random_sample should be an allocated 1D real array + !! On return, random_sample will be filled with random normal (0,1) data + !! + !! Caveat: this transform is unable to generate extremely high values (>6.66) + !! Having switched to double precision it may do better now. + !! + !------------------------------------------------- + subroutine box_muller_random(random_sample) + implicit none + real, intent(inout) :: random_sample(:) + integer :: n,i + + double precision :: u1, u2, s + double precision, allocatable :: double_random(:) + + n = size(random_sample) + allocate(double_random(n)) + call random_number(double_random) + + do i=1,n,2 + u1 = double_random(i) + if (i------------------------------------------------ + !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008) + !! + !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429. + !! + !!------------------------------------------------ + function real_normal_cdf(normal) result(cdf) + implicit none + real :: normal + real :: cdf + + cdf = 0.5 * (1 + sqrt((1 - exp(0 - (root_pi_over_8 * normal**2))) )) + + if (normal < 0) then + cdf = 1 - cdf + endif + + end function real_normal_cdf + + !>------------------------------------------------ + !! Computes an approximation to the normal CDF based on Aludaat and Alodat (2008) + !! + !! Aludaat, K.M. and Alodat, M.T. (2008). A note on approximating the normal distribution function. Applied Mathematical Sciences, Vol 2, no 9, pgs 425-429. + !! + !!------------------------------------------------ + function double_normal_cdf(normal) result(cdf) + implicit none + double precision :: normal + double precision :: cdf + + cdf = 0.5 * (1 + sqrt((1.d0 - exp(0.d0 - (root_pi_over_8 * normal**2))) )) + + if (normal < 0) then + cdf = 1 - cdf + endif + + end function double_normal_cdf + + !>------------------------------------------------ + !! Create a look up table to convert from Uniform random number to Normal random + !! + !!------------------------------------------------ + subroutine create_cdf_lookup_table() + implicit none + + integer :: i + real :: normal_random_value, normal_step + + ! in case another thread beat this one to the lock after the if + if (.not.allocated(fast_cdf_lookup_table)) then + + allocate(cdf_lookup_table(size_of_normal_cdf_lookup_table, 2)) + + normal_random_value = (-5.5) + normal_step = 11.0 / (size_of_normal_cdf_lookup_table - 1) + + do i = 1, size_of_normal_cdf_lookup_table + cdf_lookup_table(i,1) = normal_random_value + cdf_lookup_table(i,2) = normal_cdf(normal_random_value) + + normal_random_value = normal_random_value + normal_step + enddo + + ! now invert this slow lookup table to create a fast lookup table + allocate(fast_cdf_lookup_table(size_of_normal_cdf_lookup_table)) + + do i = 1,size_of_normal_cdf_lookup_table + fast_cdf_lookup_table(i) = look_up(dble(i) / size_of_normal_cdf_lookup_table, cdf_lookup_table) + enddo + + deallocate(cdf_lookup_table) + endif + + end subroutine create_cdf_lookup_table + + !>------------------------------------------------ + !! Perform a binary search through LUT to find value, then interpolate between the two closest entries. + !! + !!------------------------------------------------ + function look_up(value, LUT) result(output) + implicit none + double precision, intent(in) :: value + double precision, intent(in) :: LUT(:,:) + real :: output + + real :: fraction, denominator + integer :: i,n + integer :: step + + n = size(LUT, 1) + i = n/2 + step = n/2 + + ! perform a binary search + do while(step/2.0 >= 1) + step = (step+1) / 2 + if (LUT(i,2) > value) then + i = max(1, i - step) + else + i = min(n, i + step) + endif + + enddo + + ! figure out which side of this index we fall on and interpolate + if (value > LUT(i,2)) then + if (i==n) then + output = LUT(i,1) + else + denominator = max(1e-20, (LUT(i+1,2) - LUT(i,2))) + fraction = (value - LUT(i,2)) / denominator + output = fraction * LUT(i+1,1) + (1-fraction) * LUT(i,1) + endif + else + if (i==1) then + output = LUT(i,1) + else + denominator = max(1e-20, (LUT(i,2) - LUT(i-1,2))) + fraction = (LUT(i,2) - value) / denominator + + output = fraction * LUT(i-1,1) + (1-fraction) * LUT(i,1) + endif + endif + end function look_up + + + !>------------------------------------------------ + !! Calculate index into precomputed LUT to find cdf_value, then interpolate between the two closest entries. + !! + !!------------------------------------------------ + function get_normal_from_cdf(cdf_value) result(random_normal) + implicit none + real, intent(in) :: cdf_value + real :: random_normal + + integer :: index + double precision :: real_index + double precision :: fraction + + if (.not.allocated(fast_cdf_lookup_table)) then + !$omp critical(cdf_lookup) + call create_cdf_lookup_table() + !$omp end critical(cdf_lookup) + endif + + ! in the fast look up table we can just compute the optimal index + real_index = cdf_value * 1.0d0 * size_of_normal_cdf_lookup_table + index = min(size_of_normal_cdf_lookup_table, max(1, nint( real_index)) ) + + ! then interpolate between this index and the next closest + if (index > real_index) then + if (index == 1) then + random_normal = fast_cdf_lookup_table(index) + else + fraction = (index - real_index) + random_normal = (1-fraction) * fast_cdf_lookup_table(index) & + + fraction * fast_cdf_lookup_table(max(1,index-1)) + endif + else + if (index == size_of_normal_cdf_lookup_table) then + random_normal = fast_cdf_lookup_table(index) + else + fraction = (real_index - index) + random_normal = (1-fraction) * fast_cdf_lookup_table(index) & + + fraction * fast_cdf_lookup_table(min(size_of_normal_cdf_lookup_table,index+1)) + endif + endif + + end function get_normal_from_cdf + + +end module random_mod