-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_coef_signif_test.R
59 lines (50 loc) · 1.68 KB
/
run_coef_signif_test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
source('config/config.R')
source('config/file_paths.R')
library(foreach)
library(doParallel)
library(tidyverse)
library(data.table)
setDTthreads(1)
library(vroom)
library(Biostrings)
library(mclogit)
library(matrixcalc)
library(RhpcBLASctl)
omp_set_num_threads(1)
blas_set_num_threads(1)
args = commandArgs(trailingOnly=TRUE)
PARAM_GROUP <<- args[1]
source(paste0(MOD_PROJECT_PATH, '/scripts/param_groups/', PARAM_GROUP, '.R'))
NCPU <<- as.numeric(args[2])
# 5' motif nucleotide count
LEFT_NUC_MOTIF_COUNT <<- as.numeric(args[3])
# 3' motif nucleotide count
RIGHT_NUC_MOTIF_COUNT <<- as.numeric(args[4])
MODEL_TYPE <<- args[5]
L2 <<- args[6]
ANNOTATION_TYPE <<- args[7]
source(paste0(MOD_PROJECT_PATH,'/scripts/data_compilation_functions.R'))
# get full trained coefficients
coef_path = get_model_coef_file_path(L2 = L2)
full_coefs = fread(coef_path)
# get all trained coefficients
ex_boot_coef_path = get_bootstrap_model_coef_file_path(iteration = 1, L2 = L2)
boot_coef_paths = list.files(dirname(ex_boot_coef_path), full.names = TRUE, pattern = 'trained_coefs')
boot_coefs = data.table()
for (path in boot_coef_paths){
temp = fread(path)
boot_coefs = rbind(boot_coefs, temp)
}
# get bootstrap error
cols = c('coefficient', 'base', 'position', 'side', 'trim_type')
boot_coefs_cond = boot_coefs[, sd(value), by = cols]
setnames(boot_coefs_cond, 'V1', 'bootstrap_sd')
# combine data and compute zscores/pvalues
tog = merge(full_coefs, boot_coefs_cond)
tog[, zscore := value/bootstrap_sd]
tog[, pvalue := 2 * (1 - pnorm(abs(zscore)))]
test_count = nrow(tog)
tog[, adjusted_pvalue := pvalue * test_count]
# write results
path = get_bootstrap_pvalue_path(L2)
fwrite(tog[, -c('error')], path, sep = '\t')