Skip to content

Commit

Permalink
Merge pull request #52 from gutmann/qq_normal
Browse files Browse the repository at this point in the history
Qq normal from gutmann
  • Loading branch information
Joe Hamman authored Mar 9, 2017
2 parents a12802f + 94c0ccc commit fd6b2d9
Show file tree
Hide file tree
Showing 12 changed files with 586 additions and 80 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ test_regression
test_calendar
test_qm
test_config
test_random
gard
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ os:
- linux
addons:
apt:
source:
sources:
- ubuntu-toolchain-r-test
packages:
- gfortran
Expand Down
25 changes: 11 additions & 14 deletions src/config/configuration.f90
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ subroutine read_base_options(options)
end_date = ""
start_train = ""
end_train = ""
start_transform = ""
end_transform = ""
post_start = ""
post_end = ""
start_transform = "1980-01-01 00:00:00"
end_transform = "1980-01-01 00:00:00"
start_post = ""
end_post = ""

output_file = "gard_out_"
n_analogs = -1
Expand Down Expand Up @@ -157,27 +157,24 @@ subroutine read_base_options(options)

! this is the time period to use when calculating e.g. quantile mapping transformations
call options%transform_start%init("gregorian")
if (start_transform=="") then
stop "ERROR must set a transform start date"
if (start_transform==end_transform) then
write(*,*) "WARNING: If you are using any transforms start and end date should not be the same"
endif
call options%transform_start%set(start_transform)
call options%transform_stop%init("gregorian")
if (end_transform=="") then
stop "ERROR must set a transform end date"
endif
call options%transform_stop%set(end_transform)
! this is the time period to use when calculating e.g. quantile mapping transformations for post processing
if (maxval(post_correction_Xform) /= kNO_TRANSFORM) then
if (maxval(post_correction_transform) /= kNO_TRANSFORM) then
call options%post_start%init("gregorian")
if (post_start=="") then
if (start_post=="") then
stop "ERROR must set a post-processing start date"
endif
call options%post_start%set(post_start)
call options%post_start%set(start_post)
call options%post_end%init("gregorian")
if (post_end=="") then
if (end_post=="") then
stop "ERROR must set a post-processing end date"
endif
call options%post_end%set(post_end)
call options%post_end%set(end_post)
endif

options%training_file = training_file
Expand Down
4 changes: 4 additions & 0 deletions src/downscaling_stats/analogs.f90
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ subroutine find_analogs(analogs, match, input, n, threshold, weights, skip_analo
! fast selection (O(n_inputs log(nanalogs)) worst case, typically closer to O(n_inputs))
if (n>0) then
if (.not.allocated(analogs)) allocate(analogs(n))
if (size(analogs) < n) then
deallocate(analogs)
allocate(analogs(n))
endif
call top_n_analogs(distances, n, analogs)
elseif (threshold>0) then
distances = distances / nvars ! normalize by the number of variables so the supplied threshold doesn't have to changes
Expand Down
104 changes: 56 additions & 48 deletions src/downscaling_stats/downscaling.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module downscaling_mod

USE ieee_arithmetic
use data_structures
use string, only : str
use regression_mod, only : compute_regression, compute_logistic_regression
Expand All @@ -9,14 +10,20 @@ module downscaling_mod
use time_util, only : setup_time_indices
use basic_stats_mod, only : stddev
use io_routines, only : file_exists, io_read
use random_mod, only : box_muller_random
use sampling_mod, only : sample_distribution
implicit none

real, parameter :: LOG_FILL_VALUE = 1e-30
real, parameter :: MAX_ALLOWED_SIGMA = 20
integer, parameter :: N_RANDOM_SAMPLES = 10000

integer*8, dimension(10) :: master_timers
real, parameter :: LOG_FILL_VALUE = 1e-30
real, parameter :: MAX_ALLOWED_SIGMA = 20
real, parameter :: N_RANDOM_SAMPLES = 10000

real :: random_sample(N_RANDOM_SAMPLES)

logical :: post_process_errors = .False.

contains
subroutine downscale(training_atm, training_obs, predictors, output, options)
implicit none
Expand Down Expand Up @@ -268,6 +275,21 @@ subroutine downscale(training_atm, training_obs, predictors, output, options)
output%variables(v)%logistic ( :, i, j), &
current_threshold, options, timers, i,j)


if (post_process_errors) then
if (current_threshold /= kFILL_VALUE) then
call sample_distribution(output%variables(v)%data(:,i,j), &
output%variables(v)%data(:,i,j), &
output%variables(v)%errors(:,i, j), &
exceedence_probability=output%variables(v)%logistic(:,i,j), &
threshold = current_threshold)
else
call sample_distribution(output%variables(v)%data(:,i,j), &
output%variables(v)%data(:,i,j), &
output%variables(v)%errors(:,i, j))
endif
endif

! if the input data were transformed with e.g. a cube root or log transform, then reverse that transformation for the output
call System_Clock(timeone)
call transform_data(options%obs%input_Xforms(v), output%variables(v)%data(:,i,j), 1, noutput, &
Expand Down Expand Up @@ -560,46 +582,6 @@ subroutine transform_data(transform_type, pred_data, &
end select
end subroutine transform_data

!>------------------------------------------------
!! Use the Box-Muller Transform to convert uniform to normal random deviates
!!
!! Note random_sample should be an allocated 1D real array
!! On return, random_sample will be filled with random normal (0,1) data
!!
!! Caveat: this transform is unable to generate extremely high values (>6.66)
!! Having switched to double precision it may do better now.
!!
!-------------------------------------------------
subroutine box_muller_random(random_sample)
implicit none
real, intent(inout) :: random_sample(:)
integer :: n,i

double precision :: u1, u2, s
double precision, allocatable :: double_random(:)

n = size(random_sample)
allocate(double_random(n))
call random_number(double_random)

do i=1,n,2
u1 = double_random(i)
if (i<n) then
u2 = double_random(i+1)
else
call random_number(u2)
endif

s = sqrt(-2 * log(u1))
random_sample(i) = s * cos(2 * kPI * u2)
if (i<n) then
random_sample(i+1) = s * sin(2 * kPI * u2)
endif

enddo

end subroutine box_muller_random

function downscale_point(predictor, atm, obs_in, errors, output_coeff, logistic, logistic_threshold, options, timers, xpnt, ypnt) result(output)
implicit none
real, dimension(:,:), intent(inout):: predictor, atm ! (ntimes, nvars)
Expand Down Expand Up @@ -645,7 +627,9 @@ function downscale_point(predictor, atm, obs_in, errors, output_coeff, logistic,
return
endif

allocate(obs, source=obs_in)
allocate(obs(size(obs_in)))
obs = obs_in

if (options%time_smooth > 0) then
do i=1, n
start = max(1, i - options%time_smooth)
Expand Down Expand Up @@ -893,12 +877,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error,
call System_Clock(timetwo)
timers(9) = timers(9) + (timetwo-timeone)

! compute the logistic as the probability of threshold exceedance in the analog population
call System_Clock(timeone)
logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold)
if (options%analog_weights) then
! compute the logistic as the weighted probability of threshold exceedance in the analog population
logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold, weights)
else
! compute the logistic as the probability of threshold exceedance in the analog population
logistic = compute_analog_exceedance(obs, analogs(1:options%n_log_analogs), logistic_threshold)
endif
else

if (options%analog_weights) then
! compute the logistic as the weighted probability of threshold exceedance in the analog population
logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights)
else
! compute the logistic as the probability of threshold exceedance in the analog population
Expand All @@ -908,8 +898,18 @@ subroutine downscale_analog_regression(x, atm, obs, output_coeff, output, error,

else
logistic = compute_logistic_regression(x, regression_data, obs_analogs, coefficients, logistic_threshold)
where( coefficients > 1e20 ) coefficients = 1e20
where( coefficients < -1e20 ) coefficients = -1e20

! Check for severe errors in the logistic regression. This can happen with some input data.
! If it fails, fall back to the analog exceedence calculation
if (ieee_is_nan(logistic)) then
if (options%analog_weights) then
logistic = compute_analog_exceedance(obs, analogs, logistic_threshold, weights)
else
! compute the logistic as the probability of threshold exceedance in the analog population
logistic = compute_analog_exceedance(obs, analogs, logistic_threshold)
endif

endif
if (options%debug) then
do j = 1,nvars
output_coeff(j+nvars) = coefficients(j)
Expand Down Expand Up @@ -1141,6 +1141,7 @@ subroutine setup_timing(training_atm, training_obs, predictors, options)
type(atm), intent(inout) :: training_atm, predictors
type(obs), intent(inout) :: training_obs
type(config), intent(in) :: options
integer :: n_atm_train, n_obs_train

write(*,*) "-------------------"
write(*,*) "Training ATM"
Expand All @@ -1152,6 +1153,13 @@ subroutine setup_timing(training_atm, training_obs, predictors, options)
write(*,*) "Predictors"
call setup_time_indices(predictors, options)

n_obs_train = training_obs%training_stop - training_obs%training_start
n_atm_train = training_atm%training_stop - training_atm%training_start

if (n_obs_train /= n_atm_train) then
stop "ERROR Inconsistent time periods in training atm and obs data"
endif

end subroutine setup_timing

subroutine allocate_data(output, n_obs_variables, noutput, nx, ny, n_atm_variables, tr_size, options)
Expand Down
21 changes: 9 additions & 12 deletions src/downscaling_stats/regression.f90
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err

allocate(varlist(nvars))
varlist = -1
varlist(1) = 1 ! this assumes there is a constant passed in for the first variable...
useable_vars = 1 ! the constant
! find all vars that have some variability through time.
varlist(1) = 1 ! this assumes there is a constant passed in for the first variable...
useable_vars = 1 ! the constant

! find all vars that have some variability through time.
do i=2,nvars
nonzero_count = 0
do j=1,ntimes
Expand All @@ -45,7 +45,7 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err
endif
enddo
! need at least two data points that are different from the first data point
! to ensure that the regression on that variable might be useful.
! to ensure that the regression on that variable might be useful.
if ( nonzero_count > 2 ) then
useable_vars = useable_vars + 1
varlist(i) = i
Expand Down Expand Up @@ -116,14 +116,17 @@ function compute_regression(x, training_x, training_y, coefficients, y_test, err
if (present(weights)) then
if (allocated(weights)) then
if (size(weights)/=size(training_y_lp)) then
!$omp critical (print_lock)
write(*,*) "ERROR size of weights /= data"
write(*,*), shape(weights), shape(training_y_lp)
!$omp end critical (print_lock)
endif
if (present(y_test)) then
error = sqrt( sum(((training_y_lp - y_test)**2)*weights) / sum(weights) )
else
error = sqrt( sum(((training_y_lp - training_y)**2)*weights) / sum(weights) )
endif

else
if (present(y_test)) then
error = sqrt( sum((training_y_lp - y_test)**2) / ntimes )
Expand Down Expand Up @@ -171,6 +174,7 @@ subroutine lapack_least_squares(X, Y, B, working_space, info)

nrhs = 1

! passing working_space in and out *could* be more efficient. Only used in tests right now
if (present(working_space)) then
! First find the optimal work size (LWORK)
LWORK = -1
Expand All @@ -189,13 +193,6 @@ subroutine lapack_least_squares(X, Y, B, working_space, info)
CALL SGELS( 'N', M, N, NRHS, X, LDX, Y, LDY, WORK, LWORK, innerINFO )
endif
if (innerINFO/=0) then
! !$omp critical (print_lock)
! if (innerINFO<0) then
! write(*,*) "ERROR in SGELS with argument:", 0-innerINFO
! else
! write(*,*) "ERROR in SGELS A does not have full rank, position:",innerINFO
! endif
! !$omp end critical (print_lock)
Y(1) = sum(Y)/size(Y)
Y(2:)= 0
endif
Expand Down
Loading

0 comments on commit fd6b2d9

Please sign in to comment.