Immdonor is a GraphQL API of Immport Organ Donor metadata hosted on Hasura Cloud. This API lets you request exactly what data you need from the Immport data model and returns a JSON output which can be converted to a R dataframe!
This is a fast way to link Flow Cytometry Standard (.FCS) files to their metadata and enables more robust analysis with Cytoverse. You will need to have the FCS files saved locally (see quick_fetch.R).
You can connect using your preferred GraphQL R client. Here we use rOpenSci’s ghql package
library(ghql)
#By connecting to this API you agree to the Immport.org Terms of Service
con <- GraphqlClient$new(url = "https://resolved-lab-57.hasura.app/v1/graphql")
qry <- Query$new()
studies<- qry$query('studies','{
shared_data_study {
study_accession
actual_enrollment
minimum_age
maximum_age
}
}')
x <- con$exec(qry$queries$studies)
studies<- as.data.frame (jsonlite::fromJSON(x))
study_accession |
actual_enrollment |
minimum_age |
maximum_age |
---|---|---|---|
SDY702 |
56 |
3.00 |
73.00 |
SDY1041 |
20 |
21.00 |
70.00 |
SDY1097 |
43 |
.20 |
63.00 |
SDY1389 |
51 |
9.00 |
76.00 |
The studies listed above feature cytometry data generously donated by organ donors. We can build queries to return exactly what metadata we would like to know about these organ donors.
qry <- Query$new()
files<- qry$query('files','{
resultFiles {
filePath
studyAccession
subjectAccession
maxSubjectAge
gender
biosampleType
parameters
panel_id
markers
}
}')
x <- con$exec(qry$queries$files)
files<- as.data.frame (jsonlite::fromJSON(x))
|
|
|
Here are the top 5 staining panels with the most biosamples
panel_id |
parameters |
markers |
n |
---|---|---|---|
FCM-19 |
14 |
FITC-A-CD25|Qdot 605-A-CD45RA|Qdot 655-A-CD3|QDot 705-A-CD127|APC-A-CD19|PE-Cy7-A-CD4|PE-A-FOXP3 |
81 |
FCM-56 |
20 |
APC-A-CMV|BV570-A-CD3|Qdot 655-A-CD4|Qdot 605 UV-A-CD8|Qdot 605-A-CD45RA|Alexa Fluor 488-A-CCR7|QDot 705-A-NA|BV421-A-CD95|Alexa Fluor 700-A-INFg|PerCP-Cy5-5-A-TNFa|PE-Cy7-A-IL2|PE-A-CD107a|DAPI-A-DEAD |
78 |
FCM-21 |
18 |
FITC-A-CD25|PE-Cy5-A-CTLA-4|BV421-A-CD69|Qdot 605-A-CD45RA|Qdot 655-A-CD3|QDot 705-A-CD127|APC-A-CD103|Alexa Fluor 700-A-KI67|APC-Cy7-A-CD8|PE-Cy7-A-CD4|PE-A-FOX-p3 |
57 |
FCM-1 |
14 |
Alexa Fluor 488-A-CCR7|PerCP-Cy5-5-A-CD45RO|BV421-A-CD69|Qdot 605-A-CD45RA|Qdot 655-A-CD3|QDot 705-A-CD127|APC-A-CD31|APC-Cy7-A-CD8|PE-Texas Red-A-CD19|PE-Cy7-A-CD4|PE-A-CD28 |
54 |
FCM-2 |
18 |
Alexa Fluor 488-A-CCR7|PerCP-Cy5-5-A-CD45RO|BV421-A-CD69|Qdot 605-A-CD45RA|Qdot 655-A-CD3|QDot 705-A-CD127|APC-A-CD31|APC-Cy7-A-CD8|PE Alexa Fluor 610-A-CD19|PE-Cy7-A-CD4|PE-A-CD28 |
52 |
panel_id |
parameters |
markers |
n |
---|---|---|---|
FCM-2 |
18 |
Alexa Fluor 488-A-CCR7|PerCP-Cy5-5-A-CD45RO|BV421-A-CD69|Qdot 605-A-CD45RA|Qdot 655-A-CD3|QDot 705-A-CD127|APC-A-CD31|APC-Cy7-A-CD8|PE Alexa Fluor 610-A-CD19|PE-Cy7-A-CD4|PE-A-CD28 |
52 |
FCM-3 |
18 |
Alexa Fluor 488-A-NA|PerCP-Cy5-5-A-NA|BV421-A-NA|Qdot 605-A-NA|Qdot 655-A-NA|QDot 705-A-NA|APC-A-NA|APC-Cy7-A-NA|PE Alexa Fluor 610-A-NA|PE-Cy7-A-NA|PE-A-NA |
27 |
In order to use all of OpenCyto’s features like collapseDataForGating
we need to annotate the donor metadata into the cytoset’s pData.
# Load cytoset
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")
fn<- fcs$data.resultFiles.filePath
cs<- load_cytoset_from_fcs(files = fn)
#Annotate immdonor metadata to pData
p<- pData(cs)
m<- cbind(p,fcs)
pData(cs)<- m
# Harmonize marker names
channels <- colnames(cs)
markers <- as.vector(pData(parameters(cs[[1]]))$desc)
names(markers)<- channels
markernames(cs) <- markers
# Apply file internal compensation
comps <- lapply(cs, function(cf) spillover(cf)$SPILL)
cs_comp <- compensate(cs, comps)
# Transform fluorescent channels with FCSTrans
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
grep(colnames(cs), pattern="SSC"),
grep(colnames(cs), pattern="Time"))
chnls <- colnames(cs)[-channels_to_exclude]
fcstrans<- FCSTransTransform(transformationId = "defaultFCSTransTransform", channelrange = 2^18, channeldecade = 4.5, range = 4096, cutoff = -111, w = 0.5, rescale = TRUE)
transList <- transformList(chnls, fcstrans)
cs_trans<- transform(cs_comp,transList)
cs<- save_cytoset(cs_trans, "cytosets/tcell)
Since we annotated the cytoset with each file’s age, gender and
biosample type we can use the collapseDataForGating
feature and
groupby
biosampleType to improve our autogating.
cs<- load_cytoset(path = "cytosets/tcell")
gs<- GatingSet(cs)
gs_add_gating_method(gs, alias = "nonDebris",
pop = "+",
parent = "root",
dims = "FSC-A",
gating_method = "mindensity",
gating_args = "min = 20000, max=50000",
collapseDataForGating = "TRUE",
groupBy = "data.resultFiles.biosampleType")
## ...
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## done
gs_add_gating_method(gs, alias = "singlets",
pop = "+",
parent = "nonDebris",
dims = "FSC-A,FSC-H",
gating_method = "singletGate")
## ...
## done
gs_add_gating_method(gs, alias = "bcells",
pop = "+/-",
parent = "singlets",
dims = "CD19",
gating_method = "mindensity",
collapseDataForGating = "TRUE",
groupBy = "data.resultFiles.biosampleType")
## ...
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## done
gs_add_gating_method(gs, alias = "CD4",
pop = "+/-+/-",
parent = "CD19-",
dims = "CD3,CD4",
gating_method = "mindensity",
gating_args = "min = 1500, max=2500",
collapseDataForGating = "TRUE",
groupBy = "data.resultFiles.biosampleType")
## ...
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## done
gs_add_gating_method(gs, alias = "Tmem",
pop = "+/-+/-",
parent = "CD4+",
dims = "CCR7,CD45RA",
gating_method = "mindensity",
gating_args = "min = 1750, max=2500",
collapseDataForGating = "TRUE",
groupBy = "data.resultFiles.biosampleType")
## ...
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## Warning in .gating_gtMethod(x, y, ...): NAs introduced by coercion
## done
lln<- subset(gs, data.resultFiles.biosampleType == "Lung lymph node")
lung<- subset(gs, data.resultFiles.biosampleType == "Lung")
spleen<- subset(gs, data.resultFiles.biosampleType == "Spleen")
blood<- subset(gs, data.resultFiles.biosampleType == "Whole blood")
Lung Lymph node
ggcyto(lln, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Lung
ggcyto(lung, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Spleen
ggcyto(spleen, aes(x = "CD4", y = "CD3")) + geom_gate("CD4+") + geom_hex(bins = 128)+ ggcyto_par_set(limits = "instrument")
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Lung Lymph Node
ggcyto(gs_pop_get_data(lln ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
Lung
ggcyto(gs_pop_get_data(lung ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
Spleen
ggcyto(gs_pop_get_data(spleen ,"CD4+"), aes(x = "CCR7", y = "CD45RA")) + geom_hex(bins = 128)
work in progress…
fcs<- files %>% filter(data.resultFiles.panel_id == "FCM-2"|data.resultFiles.panel_id == "FCM-3")
fcs<- fcs %>%
dplyr::filter(data.resultFiles.biosampleType == "Lung lymph node"|data.resultFiles.biosampleType == "Lung"|data.resultFiles.biosampleType == "Spleen"|data.resultFiles.biosampleType == "PBMC"|data.resultFiles.biosampleType == "Whole blood"|data.resultFiles.biosampleType == "Inguinal lymph node"|data.resultFiles.biosampleType == "Colon"|data.resultFiles.biosampleType == "Ileum"|data.resultFiles.biosampleType == "Jejunum")
fcs$fcs_files<- fcs$data.resultFiles.filePath
fcs$study_id <- fcs$data.resultFiles.biosampleType
fcs_info<- fcs %>%
dplyr::select(fcs_files, study_id)
sample_info <- fcs %>%
dplyr::select(fcs_files, data.resultFiles.maxSubjectAge,data.resultFiles.biosampleType, data.resultFiles.gender)
preprocessing.batch(inputMeta = fcs_info,
assay = "FCM",
outpath = "metacyto/panel/preprocess_output",
b = 1/150,
excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time"))
## Study ID = Colon Preprocessing
## Study ID = Ileum Preprocessing
## Study ID = Jejunum Preprocessing
## Study ID = Whole blood Preprocessing
## Study ID = Lung lymph node Preprocessing
## Study ID = Inguinal lymph node Preprocessing
## Study ID = Lung Preprocessing
## Study ID = Spleen Preprocessing
## Study ID = PBMC Preprocessing
## Preprocess result stored in the folder: metacyto/panel/preprocess_output
files=list.files("metacyto/panel",pattern="processed_sample",recursive=TRUE,full.names=TRUE)
cs<- load_cytoset(path = "cytosets/tcell")
new<- markernames(cs)
channels_to_exclude <- c(grep(colnames(cs), pattern="FSC"),
grep(colnames(cs), pattern="SSC"),
grep(colnames(cs), pattern="Time"))
old <- colnames(cs)[-channels_to_exclude]
old<- toupper(old)
oldplus<-paste0(old,"+")
oldminus<-paste0(old,"-")
nameUpdator(oldNames=old, newNames=new, files=files)
nameUpdator(oldNames=oldplus, newNames=new, files=files)
nameUpdator(oldNames=oldminus, newNames=new, files=files)
#define parameters that we don't want to cluster
excludeClusterParameters=c("FSC-A","FSC-W","FSC-H","SSC-A","SSC-W","SSC-H","Time")
cluster_label=autoCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
excludeClusterParameters=excludeClusterParameters,
labelQuantile=0.95,
minPercent = 0.05,
clusterFunction=flowSOM.MC)
## Clustering , study ID = Colon
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Ileum
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Jejunum
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Whole blood
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Lung lymph node
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Inguinal lymph node
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Lung
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = Spleen
## Building SOM
## Mapping data to SOM
## Building MST
## Clustering , study ID = PBMC
## Building SOM
## Mapping data to SOM
## Building MST
searchCluster.batch(preprocessOutputFolder="metacyto/panel/preprocess_output",
outpath="metacyto/panel/search_output",
clusterLabel=cluster_label)
## Searching , study ID = Colon
## Searching , study ID = Ileum
## Searching , study ID = Jejunum
## Searching , study ID = Whole blood
## Searching , study ID = Lung lymph node
## Searching , study ID = Inguinal lymph node
## Searching , study ID = Lung
## Searching , study ID = Spleen
## Searching , study ID = PBMC
files=list.files("metacyto/panel/search_output",pattern="cluster_stats_in_each_sample",recursive=TRUE,full.names=TRUE)
fcs_stats=collectData(files,longform=TRUE)
# join the cluster summary statistics with sample information
all_data=inner_join(fcs_stats,sample_info,by="fcs_files")
# See the fraction of what clusters are affected by Gender (while controlling for age)
GA=glmAnalysis(value="value",
variableOfInterst="data.resultFiles.gender",
parameter="fraction",
otherVariables=c("data.resultFiles.maxSubjectAge"),
studyID="study_id",label="label",
data=all_data,CILevel=0.95,ifScale=c(TRUE,FALSE))
GA=GA[order(GA$`Effect_size`),]
GA$`label`=as.character(GA$`label`)
w = which((GA$`label`)>15)
## Warning: Use of `GA$label` is discouraged. Use `label` instead.
## Warning: Use of `GA$Effect_size` is discouraged. Use `Effect_size` instead.
Top 20 largest effect size
label |
Effect_size |
SE |
t_value |
p_value |
lower |
upper |
N |
|
---|---|---|---|---|---|---|---|---|
47 |
CD28+|CD3+|CD4+|CD45RO+ |
0.8031651 |
0.2278789 |
3.524526 |
0.0008629 |
0.3464856 |
1.2598446 |
66 |
184 |
CD28+|CD4+|CD45RO+ |
0.7426466 |
0.2428331 |
3.058260 |
0.0034352 |
0.2559982 |
1.2292949 |
66 |
85 |
CCR7+|CD19+|CD31+|CD4+|CD45RO+|CD8+ |
0.7358015 |
0.2545410 |
2.890700 |
0.0056343 |
0.2247892 |
1.2468138 |
61 |
128 |
CCR7+|CD19+|CD3+|CD31+|CD4+|CD45RO+|CD8+ |
0.7305022 |
0.2551306 |
2.863248 |
0.0060710 |
0.2183061 |
1.2426982 |
61 |
205 |
CD4+|CD45RO+ |
0.7237409 |
0.2444161 |
2.961102 |
0.0045181 |
0.2339201 |
1.2135616 |
66 |
203 |
CD28+|CD3+|CD4+|CD45RO+|CD8+ |
0.7039717 |
0.2343360 |
3.004113 |
0.0041215 |
0.2335226 |
1.1744207 |
61 |
30 |
CCR7+|CD19+ |
0.6178449 |
0.2168472 |
2.849217 |
0.0061552 |
0.1832733 |
1.0524165 |
66 |
78 |
CCR7+|CD19+|CD45RO+ |
0.6075617 |
0.2304315 |
2.636626 |
0.0108627 |
0.1457666 |
1.0693569 |
66 |
103 |
CCR7+|CD19+|CD31+|CD4+|CD45RO+ |
0.5967158 |
0.2418360 |
2.467439 |
0.0167492 |
0.1120655 |
1.0813660 |
66 |
11 |
CD127-|CD19+|CD4+|CD45RO+|CD8+ |
0.5930820 |
0.2192813 |
2.704663 |
0.0095541 |
0.1516916 |
1.0344725 |
55 |
104 |
CCR7+|CD19+|CD31+|CD4+ |
0.5903663 |
0.2410285 |
2.449363 |
0.0175242 |
0.1073343 |
1.0733983 |
66 |
132 |
CCR7+|CD3+|CD45RO+ |
0.5903451 |
0.2647291 |
2.229997 |
0.0298485 |
0.0598160 |
1.1208741 |
66 |
74 |
CCR7+|CD19+|CD28+|CD31+|CD4+|CD45RA+|CD45RO+|CD8+ |
0.5826280 |
0.2760517 |
2.110576 |
0.0397328 |
0.0284311 |
1.1368248 |
61 |
213 |
CD19+|CD3+|CD45RA+|CD45RO+ |
0.5573624 |
0.2245925 |
2.481660 |
0.0161614 |
0.1072689 |
1.0074559 |
66 |
151 |
CD4+ |
0.5528965 |
0.2464158 |
2.243754 |
0.0288942 |
0.0590682 |
1.0467248 |
66 |
17 |
CCR7+|CD19+|CD28+|CD45RO+|CD8+ |
0.5446699 |
0.2295150 |
2.373134 |
0.0214444 |
0.0838993 |
1.0054405 |
61 |
95 |
CCR7+|CD19+|CD28+|CD31+|CD4+|CD45RA+|CD8+ |
0.5394233 |
0.2465378 |
2.187994 |
0.0329366 |
0.0453505 |
1.0334961 |
66 |
120 |
CCR7+|CD19+|CD28+|CD3+|CD4+|CD45RA+ |
0.5315369 |
0.2320631 |
2.290484 |
0.0258510 |
0.0664721 |
0.9966016 |
66 |
136 |
CD19+|CD45RA+ |
0.5297685 |
0.2182681 |
2.427146 |
0.0185209 |
0.0923495 |
0.9671875 |
66 |
226 |
CCR7+|CD19+|CD28+|CD3+|CD31+|CD4+|CD45RA+|CD45RO+|CD69+|CD8+ |
0.5258602 |
0.2900366 |
1.813083 |
0.0763471 |
-0.0579531 |
1.1096735 |
55 |
- Coming soon:
- CyTOF data support
- Please contact me with ANY questions, comments or suggestions!!!
- Slack: Join the immunespace slack!
- iMessage: mrjaffery at icloud dot com
- email: mrjaffery at gmail dot com