.Rhistory

#   xMat<-as.data.frame(xMat)
#   yMat<-as.data.frame(yMat)
#   zMat<-as.data.frame(zMat)
#   wMat<-NA
# }
###################
library(parallel)
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
library(devtools)
# library(xMWAS)
install_github("J-Anzules/xMWAS")
library(xMWAS)
ls()
sink()
sink()
sink()
ls()
rm(list = ls())
#example dataset that includes metabolome, transcriptome, and cytokine data from the H1N1 mice study (Chandler 2016)
data(exh1n1)
xMat<-exh1n1$metabolome
yMat<-exh1n1$transcriptome
zMat<-exh1n1$cytokine
classlabels<-exh1n1$classlabels
library(xMWAS)
output <- "C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/"
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
# Files before sources
ls()
is.na(zMat)
sink()
sink()
is.na(zMat)
is.na(zMat)[1]
is.na(zMat)[1] == FALSE
# Uninstall a package
remove.packages("xMWAS")
library(devtools)
install_github("J-Anzules/xMWAS")
# Uninstall a package
remove.packages("xMWAS")
xMWAS
install_github("J-Anzules/xMWAS") # Select option 3 to not update
ls()
detach("package:xMWAS", unload = TRUE)
install_github("J-Anzules/xMWAS") # Select option 3 to not update
output <- "C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/"
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
library(xMWAS)
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
sink()
sink()
sink()
traceback()
# Uninstall a package
remove.packages("xMWAS")
detach("package:xMWAS", unload = TRUE)
library(devtools)
install_github("J-Anzules/xMWAS") # Select option 3 to not update
library(xMWAS)
output <- "C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/"
#call the run_xmwas() function:
xmwas_res<-run_xmwas(Xome_data=xMat,Yome_data=yMat,Zome_data=zMat,Wome_data=NA,outloc=output,
classlabels=NA,class_fname=NA,xmwasmethod="pls",plsmode="regression",
max_xvar=10000, #e.g. select top 10000 of the variabels in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=10000, #select top 10000 of the variabels in Y dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(yMat)*0.3) to select top 30% of the variables.
max_zvar=10000, #select top 10000 variabels in Z dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(zMat)*0.3) to select top 30% of the variables.
max_wvar=10000, #select top 10000 variabels in W dataset based on relative standard deviation;  change according to your dataset; you can also use proportion such as round(nrow(wMat)*0.3) to select top 30% of the variables.
rsd.filt.thresh=1,
corthresh=0.4, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=FALSE, #perform optimal PLS componenet selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=5, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("blue","red"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Metab", #change the name of dataset X
Yname="Gene", #change the name of dataset Y
Zname="Cytokine", #change the name of dataset Z
Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=0, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=100,label.cex=0.2,vertex.size=6,
interactive=FALSE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=FALSE, #compare classes: TRUE or FALSE
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=FALSE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
#load package
library(tidyverse)
library(xMWAS)
library(ggpubr)
library(fgsea)
library(data.table)
library(readxl)
# transcriptomics - counts
tran <- read_csv("../Data/xMWAS_data/ExpressionData_filtered.csv")
# metabolomics - prefiltered, raw values
met <- read_csv("../Data/xMWAS_data/FeatureTable_justprefiltered.csv")
# proteomics - prefiltered, raw values
prot <-  read_csv("../Data/xMWAS_data/ExpressionTable_justprefiltered.csv")
# class labels
key <- read_csv("../Data/xMWAS_data/SampleKey.csv")
# transcriptomics
tran <- data.frame(tran)
tran <- tran[,-1] # remove first index column
tran <- tran %>% dplyr::select(-external_gene_name)
rownames(tran) <- tran$EnsembleID # move features from first column to row names
tran <- tran[,-1]
names(tran) <- names(tran) %>% str_replace("_C", "") %>% str_replace("_D", "")
# write.csv(tran, "../Data/xMWAS_data/tran.csv")
# Impute missing data
# Replace missing values with 1/2 minimum relative peak intensity detected in dataset
logtran <- tran
logtran[logtran==0] <- NA # convert zeros to missing
## Identify minimum
mintran <- as.numeric(min(as.matrix(logtran), na.rm=TRUE))
## Replace missing values with 1/2 minima
logtran[is.na(logtran)] <- mintran/2
# Log transform
logtran <- log10(logtran)
# write.csv(logtran, "../Data/xMWAS_data/logtran.csv")
# metabolomics
met <- data.frame(met)
met <- met[-1,] # remove row with class labels
rownames(met) <- met[,1] # move features from first column to row names
met <- met[,-1]
# Order samples to match transcriptomics data set
met<-met[names(tran)]
# write.csv(met, "../Data/xMWAS_data/met.csv")
# metabolomics - prefiltered, missing data imputed, raw values for log transformation
logmet <- read_csv("../Data/xMWAS_data/FeatureTable_prefiltered.csv", skip=1)
logmet <- data.frame(logmet)
rownames(logmet) <- logmet[,1] # move features from first column to row names
logmet <- logmet[,-1]
names(logmet) <- names(met)
# Log transform
logmet <- log10(logmet)
# Order samples to match transcriptomics data set
logmet<-logmet[names(tran)]
# write.csv(logmet, "../Data/xMWAS_data/logmet.csv")
# proteomics
prot <- data.frame(prot)
prot <- prot[-1,] # remove row with class labels
rownames(prot) <- prot$mz_mz # move features from first column to row names
prot <- prot[,-1]
# write.csv(prot, "../Data/xMWAS_data/prot.csv")
# create data set with labels for samples with missing data
prot5 <- prot
prot5[,((ncol(prot5))+1):(ncol(prot5)+length(names(tran)[!names(tran)%in%names(prot)]))] <- NA
names(prot5)[((ncol(prot5)+1)-(length(names(tran)[!names(tran)%in%names(prot)]))):ncol(prot5)] <- names(tran)[!names(tran)%in%names(prot)]
# Order samples to match transcriptomics data set
prot5<-prot5[names(tran)]
# write.csv(prot5, "../Data/xMWAS_data/prot5.csv")
# setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# key <- read_csv("../Data/xMWAS_data/SampleKey.csv")
key <- data.frame(key)
key <- key %>% dplyr::select(-CenterID, -CodedID) %>% dplyr::rename(ID=IIDPID, Class=Group)
# write.csv(key, "../Data/xMWAS_data/key.csv")
output<-"../Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/"
#Please see user manual for description of arguments:
#https://github.com/kuppal2/xMWAS/blob/master/example_manual_tutorial/xMWAS-manual.pdf
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#integrate transcriptomics and metabolomics data sets since all samples aren't represented in proteomics data
xmwas_res<-run_xmwas(Xome_data=tran,Yome_data=logmet,Zome_data=NA,Wome_data=NA,outloc=output,classlabels=key,class_fname=NA,xmwasmethod="spls",plsmode="canonical",
max_xvar=nrow(tran)*0.3, #e.g. select top 10000 of the variables in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=nrow(logmet)*0.3,
max_zvar=5000,
max_wvar=5000,
rsd.filt.thresh=1,
corthresh=0.5, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=TRUE, #perform optimal PLS component selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=10, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("red","black"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Transcripts", #change the name of dataset X
Yname="Metabolites", #change the name of dataset Y
#Zname="Cytokine", #change the name of dataset Z
#Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=NA, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=08162023,label.cex=0.2,vertex.size=6,
interactive=TRUE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=TRUE, #compare classes: TRUE or FALSE
graphclustering=TRUE,
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=TRUE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
output<-"C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/"
#integrate transcriptomics and metabolomics data sets since all samples aren't represented in proteomics data
xmwas_res<-run_xmwas(Xome_data=tran,Yome_data=logmet,Zome_data=NA,Wome_data=NA,outloc=output,classlabels=key,class_fname=NA,xmwasmethod="spls",plsmode="canonical",
max_xvar=nrow(tran)*0.3, #e.g. select top 10000 of the variables in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=nrow(logmet)*0.3,
max_zvar=5000,
max_wvar=5000,
rsd.filt.thresh=1,
corthresh=0.5, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=TRUE, #perform optimal PLS component selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=10, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("red","black"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Transcripts", #change the name of dataset X
Yname="Metabolites", #change the name of dataset Y
#Zname="Cytokine", #change the name of dataset Z
#Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=NA, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=08162023,label.cex=0.2,vertex.size=6,
interactive=TRUE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=TRUE, #compare classes: TRUE or FALSE
graphclustering=TRUE,
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=TRUE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
traceback()
sink()
sink()
sink()
traceback()
k
# Uninstall a package
remove.packages("xMWAS")
detach("package:xMWAS", unload = TRUE)
install_github("J-Anzules/xMWAS") # Select option 3 to not update
library(xMWAS)
#integrate transcriptomics and metabolomics data sets since all samples aren't represented in proteomics data
xmwas_res<-run_xmwas(Xome_data=tran,Yome_data=logmet,Zome_data=NA,Wome_data=NA,outloc=output,classlabels=key,class_fname=NA,xmwasmethod="spls",plsmode="canonical",
max_xvar=nrow(tran)*0.3, #e.g. select top 10000 of the variables in X dataset based on relative standard deviation; change according to your dataset; you can also use proportion such as round(nrow(xMat)*0.3) to select top 30% of the variables.
max_yvar=nrow(logmet)*0.3,
max_zvar=5000,
max_wvar=5000,
rsd.filt.thresh=1,
corthresh=0.5, #absolute correlation threshold
keepX=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepY=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepZ=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
keepW=1000, #select up to top 1000 variables in the sPLS model; change according to your dataset
pairedanalysis=FALSE, #set to TRUE if repeated measures study design
optselect=TRUE, #perform optimal PLS component selection; TRUE or FALSE; set to FALSE for exact Pearson correlation calculation using PLS regression
rawPthresh=0.05, #p-value threshold for correlation based on Student's t-test
numcomps=10, #max number of PLS components to use; set to N-1 (N: number of samples) for exact Pearson correlation calculation using PLS regression
net_edge_colors=c("red","black"),
net_node_colors=c("orange", "green","cyan","pink"),
Xname="Transcripts", #change the name of dataset X
Yname="Metabolites", #change the name of dataset Y
#Zname="Cytokine", #change the name of dataset Z
#Wname="W", #change the name of dataset W
net_node_shape=c("square","circle","triangle","star"),
all.missing.thresh=NA, #filter based on missing values: set to NA to turn it OFF; otherwise specify a value between: 0 to 1 (e.g. 0.8 to require that at least 80% of the samples have a non-missing value)
missing.val=0,
seednum=08162023,label.cex=0.2,vertex.size=6,
interactive=TRUE,max_connections=NA,
centrality_method="eigenvector", #centrality evaluation method
use.X.reference=FALSE,removeRda=TRUE,
compare.classes=TRUE, #compare classes: TRUE or FALSE
graphclustering=TRUE,
class.comparison.allvar=TRUE,
modularity.weighted=TRUE,
globalcomparison=TRUE,
plot.pairwise=FALSE, #plot results for pairwise comparisons: TRUE or FALSE
apply.sparse.class.comparison=TRUE, #perform variable selection in sPLS during class-wise comparison (default: FALSE)
layout.type="fr1")
## Read in results for controls
res.control <- read.delim("C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/Control/cluster_membership_centrality_Control_table.txt")
cent.control <- res.control %>% dplyr::select(Name, centrality_vec) %>% dplyr::rename(Centrality=centrality_vec) %>% mutate(Network="Non-Diabetic")
cent.diab <- res.diab %>% dplyr::select(Name, centrality_vec) %>% dplyr::rename(Centrality=centrality_vec) %>% mutate(Network="Diabetic")
print("hello")
## Read in results for diabetic samples
res.diab <- read.delim("C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/Diabetic/cluster_membership_centrality_Diabetic_table.txt")
print("hello")
cent.diab <- res.diab %>% dplyr::select(Name, centrality_vec) %>% dplyr::rename(Centrality=centrality_vec) %>% mutate(Network="Diabetic")
cent.diab <- res.diab %>% dplyr::select(Name, centrality_vec) %>% dplyr::rename(Centrality=centrality_vec) %>% mutate(Network="Diabetic")
#load package
library(tidyverse)
library(xMWAS)
library(ggpubr)
library(fgsea)
library(data.table)
library(readxl)
cent.diab <- res.diab %>% dplyr::select(Name, centrality_vec) %>% dplyr::rename(Centrality=centrality_vec) %>% mutate(Network="Diabetic")
cent <- rbind(cent.control, cent.diab)
# Add variable indicating the network for each node
cent$Network <- factor(cent$Network, levels=c("Non-Diabetic", "Diabetic"))
ggplot(aes(x=Network, y=Centrality), data=cent) +
geom_violin(fill="lightgray", draw_quantiles=c(0.5), scale="count", adjust=2.5) +
theme_bw() +
stat_compare_means(method="wilcox.test", hjust=(-0.02)) +
ylab("Eigenvector Centrality")
ggsave("C:/Users/jonan/Documents/Tyseq/Data/xMWAS_data/T2D_xMWAS_outputLOG_mettran/Figures/CentralityComparison_allfeatures.png", dpi=300)
memory.limit()
View(res.control)
rm(list = (prot, prot5))
ls()
["1", "2"]
rm(list = list(prot, prot5))
rm(list = c(prot, prot5))
rm(list = c("prot", "prot5"))