diff --git a/.RData b/.RData new file mode 100644 index 0000000..b887e0b Binary files /dev/null and b/.RData differ diff --git a/.Rhistory b/.Rhistory index ff3593e..34764ca 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -#store the sequence in a matrix -m.models <- rbind(m.models, c(1,s.index)) -j <- j + 1 -} -m.matrix <- burn.seq(m.models, len) -result <- result.GibbsSampler(m.matrix, y, x, k, gamma, -p, n.models, info, family) -v.prob <- colSums(m.matrix[,-1])/len -v.select <- x.predictors[v.prob > tau] -result$v.prob <- v.prob -result$v.select <- v.select -result$tau <- tau -result$x.predictors <- x.predictors -return(result) -} -m1.restrict <- GibbsSampler(y, x, n.vars = 50, perm = TRUE, len = 200, -info = "exBIC", family = "binomial") -plots.ichart(m1.restrict) -plots.ichart(m.block) -plots.ichart(m.restrict) -m.block$v.select -m.restrict$v.select -m1.restrict$v.select -devtools::check() -devtools::build() -m1.restrict <- GibbsSampler(y, x, n.vars = 50, perm = TRUE, -info = "exBIC", family = "binomial") -View(GibbsSampler) -plots.ichart(m1.restrict) -m1.restrict$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_swio.RData") -plots.ichart(m.block) -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -plots.vr(m.block) -plots.vr(m.block, n.vars = 50) -plots.vr(m.restrict) -m.block$v.select -m.restrict$v.select -cor(data.train$P_VORT_500, data.train$P_VORT_500) -cor(data.train$P_VORT_500, data.train$P_VORT_400) -plots.vr(m.block) -plots.vr(m.restrict) -load("H:/UbuntuRv2/STC/TCG/tcg_swio_new.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_swio_new.RData") -m.block$v.select -m.restrict$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_swio.RData") -m.block$v.select -m.restrict$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_swio_new.RData") -plots.ichart(m.block) -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -plots.vr(m.block) -plots.vr(m.block, n.vars = 50) -plots.vr(m.restrict) -plots.vr(m.block, n.vars = 50) -plots.vr(m.block) -load("H:/UbuntuRv2/STC/TCG/tcg_ar.RData") -plots.ichart(m.block) -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -m.block$v.select -m.restrict$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_swio_new.RData") -plots.ichart(m.block) -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -plots.vr(m.block) -plots.vr(m.restrict) -m.block$v.select -m.restrict$v.select -2^{16} -load("H:/UbuntuRv2/STC/TCG/tcg_ar_new.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -load("H:/UbuntuRv2/STC/TCG/others/tcg_ar_new.RData") -m.block$v.select -m.restrict$v.select -install.packages("devtools") -devtools::install_github("fhebert/SNPSetSimulations",build_opts=NULL) -install.packages("devtools") -install.packages(c("BAS", "bslib", "cachem", "htmltools", "httpuv", "languageserver", "later", "memoise", "openssl", "progressr", "R.utils", "remotes", "rex", "rlang", "shiny", "styler", "tseries", "tsibble")) -install.packages(c("BAS", "bslib", "cachem", "htmltools", "httpuv", "languageserver", "later", "memoise", "openssl", "progressr", "R.utils", "remotes", "rex", "rlang", "shiny", "styler", "tseries", "tsibble")) -install.packages(c("BAS", "bslib", "cachem", "htmltools", "httpuv", "languageserver", "later", "memoise", "openssl", "progressr", "R.utils", "remotes", "rex", "rlang", "shiny", "styler", "tseries", "tsibble")) -install.packages(c("BAS", "bslib", "cachem", "htmltools", "httpuv", "languageserver", "later", "memoise", "openssl", "progressr", "R.utils", "remotes", "rex", "rlang", "shiny", "styler", "tseries", "tsibble")) -install.packages("rlang") -devtools::install_github("fhebert/SNPSetSimulations",build_opts=NULL) -install.packages("GenOrd") -devtools::install_github("fhebert/SNPSetSimulations",build_opts=NULL) -devtools::install_github("fhebert/SNPSetSimulations",build_opts=NULL, force = T) -library(SNPSetSimulations) -vignette("SNPSetSimulations") -?SNPSetSimulations -load("H:/UbuntuRv2/k610ern/VS_SNP_new.RData") -m.block$v.select -load("H:/UbuntuRv2/STC/TCG/tcg_ar_new.RData") -load("H:/UbuntuRv2/STC/TCG/tcg_ar_new.RData") -library(IBGS) -devtools::install() -load("H:/UbuntuRv2/STC/TCG/tcg_ar_new.RData") -plots.ichart(m.block) -library(IBGS) -plots.ichart(m.block) -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -plots.vr(m.block) -#plots.vr(m.block, n.vars = 50) -plots.vr(m.restrict) -m.block$v.select -m.restrict$v.select -m.block$c.models$models[[1]] -m.restrict$c.models$models[[1]] -setwd("H:/UbuntuRv2/k610ern") -load("k610ern.RData") -##omit NA -data_na <- na.omit(data1) -###xy -x1 <- data_na[,-1] +Stein <- function(w){ +Pi <- exp(Pred%*%w)/(1+exp(Pred%*%w)) +ss <- -sum( y*log(Pi)+(1-y)*log(1-Pi) ) +ss +} +model <- optim(w,fn=Stein,method="L-BFGS-B",lower=rep(0,len=m),upper=rep(1,len=m)) +w <- model$par +###################################### +Pred <- PRED%*%w +EProb <- exp(Pred)/(1+exp(Pred)) +plot(EProb,y,xlab="Estimated probability",ylab="Observed response") +print(as.numeric(cor(TProb,EProb))) +} +for(n.seed in 1:100){ +set.seed(n.seed) +n <- 100 +p <- 1000 +AX <- matrix(rnorm(n*p,0,1),ncol=p) +SSS <- 50 #Ture numb para +BETA <- rnorm(SSS,0,1) +z <- AX[,1:SSS]%*%BETA +TProb <- exp(z)/(1+exp(z)) +y <- as.numeric(runif(n,0,1) 0.95, na.rm = TRUE))] -x2 <- as.data.frame(lapply(x1.new, as.factor)) -View(x2) -y0 <- data_na[,1] -m1 <- glm(y0~x2[,1:3], family = "binomial") -x2[,1:3] -m1 <- glm(y0~x2[,1], family = "binomial") -summary(m1) -m1 <- glm(y0~x2[,1]+ x2[,2], family = "binomial") -summary(m1) -data <- as.data.frame(cbind(y0, x2)) -View(data) -m1 <- glm(y0~., data = data[,1:2] family = "binomial") -m1 <- glm(y0~., data = data[,1:2], family = "binomial") -summary(m1) -m1 <- glm(y0~., data = data[,1:3\], family = "binomial") -m1 <- glm(y0~., data = data[,1:3], family = "binomial") -summary(m1) -y0 <- data_na[,1] -data <- as.data.frame(cbind(y0, x2)) -n <- dim(x2)[1] -p <- dim(x2)[2] -#train & test -index <- sample(x = 2, size = n, replace = TRUE, prob = c(0.8,0.2)) -data.train <- data[index == 1,] -data.test <- data[index == 2,] -#train set -y <- data.train[,1] -x <- data.train[,-1] -load("k610ern.RData") -##omit NA -data_na <- na.omit(data1) -###xy -x1 <- data_na[,-1] +data <- tcg[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] +#AR ------------------------------------------ #remove highly correlated predictors -tmp <- cor(x1) +tmp <- cor(tcg) tmp[upper.tri(tmp)] <- 0 diag(tmp) <- 0 -x1.new <- x1[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x2 <- as.data.frame(lapply(x1.new, as.factor)) -y0 <- data_na[,1] -data <- as.data.frame(cbind(y0, x2)) -n <- dim(x2)[1] -p <- dim(x2)[2] +data <- tcg[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] +n <- dim(data)[1] +p <- dim(data)[2] #train & test -index <- sample(x = 2, size = n, replace = TRUE, prob = c(0.8,0.2)) -data.train <- data[index == 1,] -data.test <- data[index == 2,] +index <- c(154:238,2032:2504) +data.train <- data[-index,] +data.test <- data[index,] +data.train$TC_genesis <- as.factor(data.train$TC_genesis) +z.smote <- SMOTE(TC_genesis~., data.train) +library(DMwR) +z.smote <- SMOTE(TC_genesis~., data.train) #train set -y <- data.train[,1] -x <- data.train[,-1] -dim(x) -#block -m.block <- BlockGibbsSampler(y, x, info = "exBIC", family = "binomial") -x <- as.matrix(data.train[,-1]) -#block -m.block <- BlockGibbsSampler(y, x, info = "exBIC", family = "binomial") -#train set -y <- data.train[,1] -x <- data.train[,-1] -attr(x) -summary(x[,1]) -data <- as.data.frame(cbind(y, x)) -m1 <- glm(y~., data = data[,1:3], family = "binomial") -summary(m1) -x <- as.matrix(data.train[,-1]) -data <- as.data.frame(cbind(y, x)) -m1 <- glm(y~., data = data[,1:3], family = "binomial") -head(data) -summary(data[,2]) -summary(data[,1]) -summary(x[,1]) -summary(x2[,1]) -cbind(x2,vector()) -cbind(x2,data.frame()) -devtools::check() +y <- as.numeric(z.smote[,121])-1 +table(y) +x <- as.matrix(z.smote[,-121]) +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_1_1.RData") +Pred <- predicts.Gibbs(data.test, m.block) +binary.table <- function(y, y_pred, type = c("link", "response")){ +m <- matrix(rep(0,4), ncol = 2) +rownames(m) <- c("Positive", "Negative") +colnames(m) <- c("True", "False") +if(type == "link"){ +m[1,1] <- sum(y == 1 & y_pred > 0) +m[1,2] <- sum(y == 0 & y_pred > 0) +m[2,1] <- sum(y == 1 & y_pred < 0) +m[2,2] <- sum(y == 0 & y_pred < 0) +} +else{ +m[1,1] <- sum(y == 1 & y_pred > 0.5) +m[1,2] <- sum(y == 0 & y_pred > 0.5) +m[2,1] <- sum(y == 1 & y_pred < 0.5) +m[2,2] <- sum(y == 0 & y_pred < 0.5) +} +return(m) +} +binary.table(data.test$TC_genesis, Pred, type = "response") +stargazer::stargazer(binary.table(data.test$TC_genesis, Pred, type = "response")) +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_2_1.RData") +Pred <- predicts.Gibbs(data.test, m.block) +stargazer::stargazer(binary.table(data.test$TC_genesis, Pred, type = "response")) +#SMOTE training ---------------------------------- +data.train$TC_genesis <- as.factor(data.train$TC_genesis) +z.smote <- SMOTE(TC_genesis~., data.train) +PredT <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +x.var <- as.numeric(colnames(fit$model[,-1])) +z.new <- z.smote[,c(x.var,p)] +fit0 <- glm(TC_genesis~., data = z.new, family = binomial()) +pred0 <- predict(fit0, data.test[,x.var], type = "link") +PredT <- cbind(PredT, pred0) +} +Pred.t <- PredT %*% m.block$c.models$weights +save.image("tcg_ar_method_3_1.RData") +m.block$c.models$weights +binary.table(data.test$TC_genesis, Pred.t, type = "link") +stargazer::stargazer(binary.table(data.test$TC_genesis, Pred.t, type = "link")) +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_1_1.RData") +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_1_1.RData") +fit <- m.block$c.models$models[[1]] +dim(fit$model) +head(fit$model) +P0 <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +p1 <- dim(fit$model)-1 +PredT <- cbind(P0, p1) +} +p2 <- P0 %*% m.block$c.models$weights +P0 <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +p1 <- dim(fit$model)-1 +PredT <- cbind(P0, p1) +} +P0 <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +p1 <- dim(fit$model)-1 +P0 <- c(P0, p1) +} +P0 <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +p1 <- dim(fit$model)[2]-1 +P0 <- c(P0, p1) +} +p2 <- P0 %*% m.block$c.models$weights +m.block$c.models$weights +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_2_1.RData") +P0 <- vector() +for(i in 1:length(m.block$c.models$weights)){ +fit <- m.block$c.models$models[[i]] +p1 <- dim(fit$model)[2]-1 +P0 <- c(P0, p1) +} +p2 <- P0 %*% m.block$c.models$weights +balanced.metric <- function(m){ +precision <- m[1,1]/(m[1,1] + m[1,2]) +recall <- m[1,1]/(m[1,1] + m[2,1]) +f.measure <- (2*precision*recall)/(precision + recall) +return(c(precision, recall, f.measure)) +} +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_1_1.RData") +Pred <- predicts.Gibbs(data.test, m.block) +m1 <- binary.table(data.test$TC_genesis, Pred, type = "response") +binary.table <- function(y, y_pred, type = c("link", "response")){ +m <- matrix(rep(0,4), ncol = 2) +rownames(m) <- c("Positive", "Negative") +colnames(m) <- c("True", "False") +if(type == "link"){ +m[1,1] <- sum(y == 1 & y_pred > 0) #TP +m[1,2] <- sum(y == 0 & y_pred > 0) #FP +m[2,1] <- sum(y == 1 & y_pred < 0) #FN +m[2,2] <- sum(y == 0 & y_pred < 0) #TN +} +else{ +m[1,1] <- sum(y == 1 & y_pred > 0.5) +m[1,2] <- sum(y == 0 & y_pred > 0.5) +m[2,1] <- sum(y == 1 & y_pred < 0.5) +m[2,2] <- sum(y == 0 & y_pred < 0.5) +} +return(m) +} +balanced.metric <- function(m){ +precision <- m[1,1]/(m[1,1] + m[1,2]) +recall <- m[1,1]/(m[1,1] + m[2,1]) +f.measure <- (2*precision*recall)/(precision + recall) +return(c(precision, recall, f.measure)) +} +m1 <- binary.table(data.test$TC_genesis, Pred, type = "response") +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_2_1.RData") +m2 <- binary.table(data.test$TC_genesis, Pred, type = "response") +Pred <- predicts.Gibbs(data.test, m.block) +m2 <- binary.table(data.test$TC_genesis, Pred, type = "response") +load("H:/UbuntuRv2/STC/TCG/tcg_ar_method_3_1.RData") +m3 <- binary.table(data.test$TC_genesis, Pred.t, type = "link") +imbalanced.metric <- function(m){ +precision <- m[1,1]/(m[1,1] + m[1,2]) +recall <- m[1,1]/(m[1,1] + m[2,1]) +f.measure <- (2*precision*recall)/(precision + recall) +return(c(precision, recall, f.measure)) +} +m4 <- rbind(imbalanced.metric(m1),imbalanced.metric(m2),imbalanced.metric(m3)) +m4 +stargazer::stargazer(m4) +sum((tcg$TC_genesis)) +2504-238 devtools::check() -devtools::build() -load("H:/UbuntuRv2/k610ern/VS_SNP_c.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -plots.vr(m.block, n.vars = 50) -m.block$v.select -m.block$c.models$models[[1]] -summary(m.block$c.models$models[[1]]) -AICc(m.block$c.models$models[[1]]) -AICcmodavg::AICc(m.block$c.models$models[[1]]) -exBIC(m.block$c.models$models[[1]]) -exBIC(m.block$c.models$models[[1]], gamma = 0.5) -?exBIC -exBIC(m.block$c.models$models[[1]], 0.5, 279) -BIC(m.block$c.models$models[[1]]) -load("H:/UbuntuRv2/k610ern/VS_SNP_new.RData") -summary(m.block$c.models$models[[1]]) -load("H:/UbuntuRv2/k610ern/VS_SNP.RData") -summary(m.block$c.models$models[[1]]) -setwd("H:/UbuntuRv2/STC/Final") -#data -STC <- read.csv("STC.csv") -#predictors -x <- as.matrix(STC[,10:45]) -colnames(x) <- c("DMSLP.Aug", "TMSLP.Aug", "DMI.Aug", "DMIE.Aug", "DMIW.Aug", "QBO.Aug", -"SOI.Aug", "N12.Aug", "N34.Aug", "N3.Aug", "N4.Aug", "EMI.Aug", -"DMSLP.Sep", "TMSLP.Sep", "DMI.Sep", "DMIE.Sep", "DMIW.Sep", "QBO.Sep" , -"SOI.Sep" , "N12.Sep" , "N34.Sep" , "N3.Sep" , "N4.Sep" , "EMI.Sep" , -"DMSLP.Oct", "TMSLP.Oct", "DMI.Oct" , "DMIE.Oct" , "DMIW.Oct", "QBO.Oct" , -"SOI.Oct" , "N12.Oct" , "N34.Oct", "N3.Oct" , "N4.Oct" , "EMI.Oct") -tmp <- cor(x) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x.new <- x[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -View(x.new) -setwd("H:/UbuntuRv2/k610ern") -load("H:/UbuntuRv2/k610ern/VS_SNP_c.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -plots.vr(m.block, n.vars = 50) -summary(m.block$c.models$models[[1]]) -exBIC((m.block$c.models$models[[1]])) -exBIC((m.block$c.models$models[[1]]),0.5,267) -library(SNPSetSimulations) -m <- 100 -rho <- 0.8 -Sigma <- rho^abs(outer(1:m,1:m,"-")) -View(Sigma) -G <- PopulationSNPSet(1000,Sigma) -G <- PopulationSNPSet(1000,Sigma, p=rep(0.4,m)) -G <- PopulationSNPSet(n=1000,Sigma = Sigma, p=rep(0.4,m)) -library(SNPSetSimulations) -m <- 100 -rho <- 0.8 -S <- rho^abs(outer(1:m,1:m,"-")) -G <- PopulationSNPSet(n=1000,Sigma = S, p=rep(0.4,m)) -G <- PopulationSNPSet(n=1000,Sigma = S) -G <- PopulationSNPSet(n=100000,Sigma = S) -M <- cbind(rep(1/3,m), rep(2/3,m)) -M <- lapply(1:nrow(M), function(i){M[i,]}) -G <- PopulationSNPSet(n=1000,Sigma = S, marginal = M) -??`SNPSetSimulations-package` -Y <- SampleSNPPhenotype(G, -1, beta = rep(2,5), I = 50:54 ) -Y -View(Y) -save(Y, file = "STC.RData") -save(Y, file = "SNP_simu.RData") -load("SNP_simu.RData") -###xy -x1 <- Y$SNP -#remove highly correlated predictors -tmp <- cor(x1) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x1.new <- x1[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -###xy -x1 <- Y$SNP -x <- as.data.frame(lapply(x1, as.factor)) -y <- Y$Phenotype -summary(x1[,1]) -View(x) -View(x1) -dim(x1) -x <- as.data.frame(lapply(as.matrix(x1), as.factor)) -as.matrix(x1) -load("k610ern.RData") -###xy -x1 <- data_na[,-1] -#remove highly correlated predictors -tmp <- cor(x1) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x1.new <- x1[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x2 <- as.data.frame(lapply(x1.new, as.factor)) -x2 <- as.data.frame(apply(x1.new, 1,as.factor)) -x <- as.data.frame(apply(x1,1, as.factor)) -View(x) -dim(x1) -x <- as.data.frame(apply(x1,2, as.factor)) -summary(x[,1]) -x[,1] -load("k610ern.RData") -##omit NA -data_na <- na.omit(data1) -###xy -x1 <- data_na[,-1] -x2 <- as.data.frame(lapply(x1, as.factor)) -x10 <- as.data.frame(x1) -x <- as.data.frame(lapply(x10, as.factor)) -load("SNP_simu.RData") -###xy -x1 <- as.data.frame(Y$SNP) -x <- as.data.frame(lapply(x1, as.factor)) -y <- Y$Phenotype -summary(x[,1]) -devtools::build() -devtools::build() -load("H:/UbuntuRv2/k610ern/VS_SNP_simu1.RData") -plots.ichart(m.block) -library(IBGS) -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -summary(m.block$c.models$models[[1]]) -m2 <- glm(y~x[,50:54], family = binomial()) -m2 <- glm(y~as.matrix(x[,50:54]), family = binomial()) -m2 <- glm(y~., data = as.data.frame(cbind(y,x[,50:54])), family = binomial()) -summary(m2) -m3 <- glm(y~., data = as.data.frame(cbind(y,x1[,50:54])), family = binomial()) -summary(m3) -load("H:/UbuntuRv2/k610ern/VS_SNP_simu2.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -summary(m.block$c.models$models[[1]]) -sum(cor(x) > 0.9) -sum(cor(x) > 0.9 - 100) -sum(cor(x) > 0.9)-100 -head(cor(x)) -m3 <- glm(y~., data = as.data.frame(cbind(y,x[,50:54]), binomial())) -m3 <- glm(y~., data = as.data.frame(cbind(y, x[,50:54])), binomial()) -summary(m3) -summary(m.block$c.models$models[[1]]) -load("k610ern.RData") -setwd("H:/UbuntuRv2/k610ern") -load("k610ern.RData") -##omit NA -data_na <- na.omit(data1) -###xy -x1 <- data_na[,-1] -#remove highly correlated predictors -tmp <- cor(x1) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x2 <- x1[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -y0 <- data_na[,1] -data <- as.data.frame(cbind(y0, x2)) -summary(x[,1]) -load("H:/UbuntuRv2/k610ern/VS_SNP_c2.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -load("H:/UbuntuRv2/k610ern/VS_SNP_c2.RData") -m.block$v.select -plots.vr(m.block) -load("H:/UbuntuRv2/k610ern/VS_SNP_c2.RData") -load("H:/UbuntuRv2/k610ern/VS_SNP_c2.RData") -plots.ichart(m.block) -plots.mf(m.block) -plots.vr(m.block) -m.block$v.select -plots.vr(m.block, n.vars = 50) -summary(m.block$c.models$models[[1]]) -load("k610ern.RData") -library(IBGS) -library(doParallel) -registerDoParallel(12) -##omit NA -data_na <- na.omit(data1) -###xy -x1 <- data_na[,-1] -#remove highly correlated predictors -tmp <- cor(x1) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x2 <- x1[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -y0 <- data_na[,1] -data <- as.data.frame(cbind(y0, x2)) -n <- dim(x2)[1] -p <- dim(x2)[2] -m.restrict <- GibbsSampler(y0, x2, info = "exBIC", family = "binomial") -m.restrict <- GibbsSampler(y0, x2, n.vars = 20, info = "exBIC", family = "binomial") -plots.ichart(m.restrict) -plots.mf(m.block) -plots.mf(m.restrict) -plots.vr(m.block) -plots.vr(m.block, n.vars = 50) -plots.vr(m.restrict) -m.block$v.select -m.restrict$v.select -summary(m.block$c.models$models[[1]]) -summary(m.restrict$c.models$models[[1]]) -m4 <- glm(y0~x2$rs1004984_A + x2$rs1131878_C + x2$rs1845557_C + x2$rs2300697_C + -x2$rs2476923_A, family = binomial()) -summary(m4) -m4 <- glm(y0~x2$rs1004984_A + x2$rs1131878_C + x2$rs1845557_C + x2$rs2300697_C + -x2$rs2476923_A + x2$rs248805_A + x2$rs2547231_C + x2$rs2758331_A + -x2$rs3760802_A + x2$rs4147581_G + x2$rs4702374_G + x2$rs4952220_C + -x2$rs6163_A, family = binomial()) -summary(m4) -exBIC(m4) -exBIC(m4,0.5,279) -m5 <- glm(y0~x1$rs1004984_A + x1$rs1131878_C + x1$rs12917295_G + x1$rs1845557_C + x1$rs2300697_C + -x1$rs2476923_A + x1$rs248805_A + x1$rs2547231_C + x1$rs2758331_A + -x1$rs3760802_A + x1$rs4147581_G + x1$rs4702374_G + x1$rs4952220_C + -x1$rs6163_A + x1$rs6902771_T + x1$rs7706809_T, family = binomial()) -summary(m5) -x4 <- x1 -x1 <- as.data.frame(lapply(x1, as.factor)) -m5 <- glm(y0~x1$rs1004984_A + x1$rs1131878_C + x1$rs12917295_G + x1$rs1845557_C + x1$rs2300697_C + -x1$rs2476923_A + x1$rs248805_A + x1$rs2547231_C + x1$rs2758331_A + -x1$rs3760802_A + x1$rs4147581_G + x1$rs4702374_G + x1$rs4952220_C + -x1$rs6163_A + x1$rs6902771_T + x1$rs7706809_T, family = binomial()) -summary(m5) -summary(m.restrict$c.models$models[[1]]) -summary(m.block$c.models$models[[1]]) -###remove highly correlation predictors -remove.highly.cor <- function(x, rho){ -tmp <- cor(x) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x1 <- x[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x2 <- x[, apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x0 <- list() -x0$extracted <- x1 -x0$removed <- x2 -return(x0) -} -###remove highly correlation predictors -remove.highly.cor <- function(x, rho){ -tmp <- cor(x) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x1 <- x[, !apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x2 <- x[, apply(tmp, 2, function(x) any(abs(x) > 0.95, na.rm = TRUE))] -x0 <- list() -x0$extracted <- x1 -x0$removed <- x2 -return(x0) -} -###remove highly correlation predictors -remove.highly.cor <- function(x, rho){ -tmp <- cor(x) -tmp[upper.tri(tmp)] <- 0 -diag(tmp) <- 0 -x1 <- x[, !apply(tmp, 2, function(x) any(abs(x) > rho, na.rm = TRUE))] -x2 <- x[, apply(tmp, 2, function(x) any(abs(x) > rho, na.rm = TRUE))] -x0 <- list() -x0$extracted <- x1 -x0$removed <- x2 -return(x0) -} -x9 <- remove.highly.cor(x1, 0.95) -x1 <- x4 -x9 <- remove.highly.cor(x1, 0.95) -View(x9) -divide <- function(x, prob = c(0.8,0.2)){ -n <- dim(x)[1] -index <- sample(x = 2, size = n, replace = TRUE, prob) -x0 <- list() -x0$train <- data[index == 1,] -x0$test <- data[index == 2,] -return(x0) -} -x10 <- divide(x1) -View(x10) -x1 <- as.data.frame(lapply(x1, as.factor)) -m5 <- glm(y0~x1$rs1004984_A + x1$rs1131878_C + x1$rs12917295_G + x1$rs1845557_C + x1$rs2300697_C + -x1$rs2476923_A + x1$rs248805_A + x1$rs2547231_C + x1$rs2758331_A + -x1$rs3760802_A + x1$rs4147581_G + x1$rs4702374_G + x1$rs4952220_C + -x1$rs6163_A + x1$rs6902771_T + x1$rs7706809_T, family = binomial()) -summary(m5) -load("H:/UbuntuRv2/k610ern/VS_SNP_c1.RData") -summary(m.block$c.models$models[[1]]) -load("H:/UbuntuRv2/k610ern/VS_SNP_simu2.RData") -plots.vr(m.block) -m.restrict <- GibbsSampler(y0, x2, n.vars = 20, info = "BIC", family = "binomial") -x <- matrix(rnorm(100*100), ncol = 100) -y <- rowSums(x[,1:10]) + rnorm(100) -bic.v <- rep(0,100) -for(i in 1:100){ -bic.v[i] <- BIC(glm(y~x[,1:i])) -} -plot(bic.v) -plot(bic.v[1:98]) diff --git a/DESCRIPTION b/DESCRIPTION index d74eba3..584d27d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: IBGS Type: Package Title: Variable selection in ultrahigh dimensions using Gibbs sampler -Version: 0.1.6 +Version: 0.1.7 Author: Lizhong Chen Maintainer: Lizhong Chen Description: This package provides the iterated block Gibbs sampler, called IBGS, to solve the variable selection problem in ultrahigh dimensions. diff --git a/NAMESPACE b/NAMESPACE index feb6d51..485d603 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,9 +9,11 @@ export(CV) export(GibbsSampler) export(GibbsSamplerStep) export(ac.ratio) +export(binary.table) export(burn.seq) export(divide) export(exBIC) +export(imbalanced.metric) export(plots.ichart) export(plots.mf) export(plots.vr) diff --git a/R/gibbs.R b/R/gibbs.R index f821ce9..a094561 100644 --- a/R/gibbs.R +++ b/R/gibbs.R @@ -32,7 +32,7 @@ GibbsSampler <- function(y, x, n.vars = ncol(x), perm = TRUE, n.models = 10, x.predictors <- colnames(x) colnames(x) <- 1:p - z <- as.data.frame(cbind(y,x)) + z <- as.data.frame(cbind(y,x), stringsAsFactors = TRUE) colnames(z)[1] <- "y" s.index <- c(rep(1,n.vars), rep(0, p - n.vars)) diff --git a/R/gibbs1.R b/R/gibbs1.R index 0a8615e..ed37c17 100644 --- a/R/gibbs1.R +++ b/R/gibbs1.R @@ -19,10 +19,10 @@ GibbsSamplerStep <- function(y, x1, x2, s.model, perm, len, k, gamma, p0, info, p1 <- dim(x1)[2] if(is.null(dim(x2))){ - z <- as.data.frame(cbind(y,x1)) + z <- as.data.frame(cbind(y,x1), stringsAsFactors = TRUE) p2 <- 0 }else{ - z <- as.data.frame(cbind(y,x1,x2)) + z <- as.data.frame(cbind(y,x1,x2), stringsAsFactors = TRUE) p2 <- dim(x2)[2] } colnames(z)[1] <- "y" diff --git a/R/imbalanced.R b/R/imbalanced.R new file mode 100644 index 0000000..162b6ce --- /dev/null +++ b/R/imbalanced.R @@ -0,0 +1,42 @@ +#' Binary table for imbalanced data +#' +#' @param y observations in the data +#' @param y_pred predictions of the model +#' @param type response type +#' +#' @return a binary table +#' @export +#' +binary.table <- function(y, y_pred, type = c("link", "response")){ + m <- matrix(rep(0,4), ncol = 2) + rownames(m) <- c("Positive", "Negative") + colnames(m) <- c("True", "False") + + if(type == "link"){ + m[1,1] <- sum(y == 1 & y_pred > 0) #TP + m[1,2] <- sum(y == 0 & y_pred > 0) #FP + m[2,1] <- sum(y == 1 & y_pred < 0) #FN + m[2,2] <- sum(y == 0 & y_pred < 0) #TN + } + else{ + m[1,1] <- sum(y == 1 & y_pred > 0.5) + m[1,2] <- sum(y == 0 & y_pred > 0.5) + m[2,1] <- sum(y == 1 & y_pred < 0.5) + m[2,2] <- sum(y == 0 & y_pred < 0.5) + } + return(m) +} + +#' Imbalanced metrics +#' +#' @param m a binary table +#' +#' @return a vector consisting of imbalanced metrics +#' @export +#' +imbalanced.metric <- function(m){ + precision <- m[1,1]/(m[1,1] + m[1,2]) + recall <- m[1,1]/(m[1,1] + m[2,1]) + f.measure <- (2*precision*recall)/(precision + recall) + return(c(precision, recall, f.measure)) +} diff --git a/man/binary.table.Rd b/man/binary.table.Rd new file mode 100644 index 0000000..2a5675d --- /dev/null +++ b/man/binary.table.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/imbalanced.R +\name{binary.table} +\alias{binary.table} +\title{Binary table for imbalanced data} +\usage{ +binary.table(y, y_pred, type = c("link", "response")) +} +\arguments{ +\item{y}{observations in the data} + +\item{y_pred}{predictions of the model} + +\item{type}{response type} +} +\value{ +a binary table +} +\description{ +Binary table for imbalanced data +} diff --git a/man/imbalanced.metric.Rd b/man/imbalanced.metric.Rd new file mode 100644 index 0000000..c4652cd --- /dev/null +++ b/man/imbalanced.metric.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/imbalanced.R +\name{imbalanced.metric} +\alias{imbalanced.metric} +\title{Imbalanced metrics} +\usage{ +imbalanced.metric(m) +} +\arguments{ +\item{m}{a binary table} +} +\value{ +a vector consisting of imbalanced metrics +} +\description{ +Imbalanced metrics +} diff --git a/man/plots.vr.Rd b/man/plots.vr.Rd index d6af03c..9ba9530 100644 --- a/man/plots.vr.Rd +++ b/man/plots.vr.Rd @@ -4,7 +4,7 @@ \alias{plots.vr} \title{plot the marginal probability for top predictors} \usage{ -plots.vr(result, n.vars = 20) +plots.vr(result, n.vars = 20, lwd = 2, side = 1, line = 0.25, las = 2, cex = 1) } \arguments{ \item{result}{the result from IBGS}