-
Notifications
You must be signed in to change notification settings - Fork 4
/
seurat-run-umap.R
executable file
·319 lines (301 loc) · 14 KB
/
seurat-run-umap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#!/usr/bin/env Rscript
# This script has been automatically generated through
#
# YAML2RScript.py -i ../r-seurat-scripts/cli-generated/manually_crafted_YAML/seurat-umap.yaml -o ../r-seurat-scripts/seurat-run-umap.R
#
# to change this file edit the input YAML and re-run the above command
suppressPackageStartupMessages(require(workflowscriptscommon))
suppressPackageStartupMessages(require(scater))
suppressPackageStartupMessages(require(optparse))
suppressPackageStartupMessages(require(Seurat))
suppressPackageStartupMessages(require(SeuratDisk))
option_list <- list(
make_option(
c("-i", "--input-object-file"),
action = "store",
metavar = "Input file",
type = "character",
help = "Input file with Seurat object in either RDS-Seurat, Loom or SCE")
,
make_option(
c("--input-format"),
action = "store",
default = "seurat",
metavar = "Input format",
type = "character",
help = "Either loom, seurat, anndata or singlecellexperiment for the input format to read.")
,
make_option(
c("--dims"),
action = "store",
default = NULL,
type = "character",
help = "Which dimensions to use as input features, used only if list('features') is NULL")
,
make_option(
c("--reduction"),
action = "store",
default = "pca",
type = "character",
help = "Which dimensional reduction (PCA or ICA) to use for the UMAP input. Default is PCA")
,
make_option(
c("--features"),
action = "store",
default = NULL,
type = "character",
help = "Comma-separated list of features to use. If set, run UMAP on this subset of features (instead of running on a set of reduced dimensions). Not set (NULL) by default; dims must be NULL to run on features")
,
make_option(
c("--graph"),
action = "store",
default = NULL,
type = "character",
help = "Name of graph on which to run UMAP")
,
make_option(
c("--assay"),
action = "store",
default = NULL,
type = "character",
help = "Assay to pull data for when using list('features') , or assay used to construct Graph if running UMAP on a Graph")
,
make_option(
c("--nn.name"),
action = "store",
default = NULL,
type = "character",
help = "Name of knn output on which to run UMAP")
,
make_option(
c("--slot"),
action = "store",
default = "data",
type = "character",
help = "The slot used to pull data for when using list('features') . data slot is by default.")
,
make_option(
c("--umap.method"),
action = "store",
default = "uwot",
type = "character",
help = "UMAP implementation to run. Can be list uwot, uwot-learn, umap-learn (rquires python umap-learn package).")
,
make_option(
c("--reduction.model"),
action = "store",
default = NULL,
type = "character",
help = "list('DimReduc') object that contains the umap model")
,
make_option(
c("--return.model"),
action = "store_true",
default = FALSE,
type = "logical",
help = "whether UMAP will return the uwot model")
,
make_option(
c("--n.neighbors"),
action = "store",
default = 30,
type = "integer",
help = "This determines the number of neighboring points used in local approximations of manifold structure. Larger values will result in more global structure being preserved at the loss of detailed local structure. In general this parameter should often be in the range 5 to 50.")
,
make_option(
c("--n.components"),
action = "store",
default = 2,
type = "integer",
help = "The dimension of the space to embed into.")
,
make_option(
c("--metric"),
action = "store",
default = "cosine",
type = "character",
help = "metric: This determines the choice of metric used to measure distance in the input space. A wide variety of metrics are already coded, and a user defined function can be passed as long as it has been JITd by numba.")
,
make_option(
c("--n.epochs"),
action = "store",
default = NULL,
type = "integer",
help = "The number of training epochs to be used in optimizing the low dimensional embedding. Larger values result in more accurate embeddings. If NULL is specified, a value will be selected based on the size of the input dataset (200 for large datasets, 500 for small).")
,
make_option(
c("--learning.rate"),
action = "store",
default = 1,
type = "integer",
help = "The initial learning rate for the embedding optimization.")
,
make_option(
c("--min.dist"),
action = "store",
default = 0,
type = "integer",
help = "This controls how tightly the embedding is allowed compress points together. Larger values ensure embedded points are moreevenly distributed, while smaller values allow the algorithm to optimise more accurately with regard to local structure. Sensible values are in the range 0.001 to 0.5.")
,
make_option(
c("--spread"),
action = "store",
default = 1,
type = "integer",
help = "The effective scale of embedded points. In combination with min.dist this determines how clustered/clumped the embedded points are.")
,
make_option(
c("--set.op.mix.ratio"),
action = "store",
default = 1.0,
type = "double",
help = "Interpolate between (fuzzy) union and intersection as the set operation used to combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Both fuzzy set operations use the product t-norm. The value of this parameter should be between 0.0 and 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.")
,
make_option(
c("--local.connectivity"),
action = "store",
default = 1,
type = "integer",
help = "The local connectivity required - i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally. In practice this should be not more than the local intrinsic dimension of the manifold.")
,
make_option(
c("--repulsion.strength"),
action = "store",
default = 1,
type = "integer",
help = "Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.")
,
make_option(
c("--negative.sample.rate"),
action = "store",
default = 5,
type = "integer",
help = "The number of negative samples to select per positive sample in the optimization process. Increasing this value will result in greater repulsive force being applied, greater optimization cost, but slightly more accuracy.")
,
make_option(
c("--a"),
action = "store",
default = NULL,
type = "character",
help = "More specific parameters controlling the embedding. If NULL, these values are set automatically as determined by min. dist and spread. Parameter of differentiable approximation of right adjoint functor.")
,
make_option(
c("--b"),
action = "store",
default = NULL,
type = "character",
help = "More specific parameters controlling the embedding. If NULL, these values are set automatically as determined by min. dist and spread. Parameter of differentiable approximation of right adjoint functor.")
,
make_option(
c("--uwot.sgd"),
action = "store_true",
default = FALSE,
type = "logical",
help = "Set list('uwot::umap(fast_sgd = TRUE)') ; see list('umap') for more details")
,
make_option(
c("--seed.use"),
action = "store",
default = 42,
type = "integer",
help = "Set a random seed. By default, sets the seed to 42. Setting NULL will not set a seed")
,
make_option(
c("--metric.kwds"),
action = "store",
default = NULL,
type = "character",
help = "A dictionary of arguments to pass on to the metric, such as the p value for Minkowski distance. If NULL then no arguments are passed on.")
,
make_option(
c("--angular.rp.forest"),
action = "store_true",
default = FALSE,
type = "logical",
help = "Whether to use an angular random projection forest to initialise the approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that use an angular style distance such as cosine, correlation etc. In the case of those metrics angular forests will be chosen automatically.")
,
make_option(
c("--do-not-verbose"),
action = "store_false",
default = TRUE,
type = "logical",
help = "Controls verbosity")
,
make_option(
c("--reduction.name"),
action = "store",
default = "umap",
type = "character",
help = "Name to store dimensional reduction under in the Seurat object")
,
make_option(
c("--reduction.key"),
action = "store",
default = "UMAP",
type = "character",
help = "dimensional reduction key, specifies the string before the number for the dimension names. UMAP by default")
,
make_option(
c("-o", "--output-object-file"),
action = "store",
type = "character",
help = "FILE IN")
,
make_option(
c("--output-format"),
action = "store",
default = "seurat",
type = "character",
help = "FILE IN")
)
opt <- wsc_parse_args(option_list,
mandatory = c("input_object_file", "output_object_file"))
if (!file.exists(opt$input_object_file)) {
stop((paste("File", opt$input_object_file, "does not exist")))
}
if (!is.null(opt$dims)) {
opt$dims <- eval(parse(text = opt$dims))
}
features <- opt$features
if (!is.null(features)) {
features <- unlist(strsplit(opt$features, sep = ","))
}
if (!is.null(opt$metric.kwds)) {
opt$metric.kwds <- eval(parse(text = opt$metric.kwds))
}
seurat_object <- read_seurat4_object(input_path = opt$input_object_file,
format = opt$input_format)
seurat_object_umap <- RunUMAP(object = seurat_object,
dims = opt$dims,
reduction = opt$reduction,
features = features,
graph = opt$graph,
assay = opt$assay,
nn.name = opt$nn.name,
slot = opt$slot,
umap.method = opt$umap.method,
reduction.model = opt$reduction.model,
return.model = opt$return.model,
n.neighbors = opt$n.neighbors,
n.components = opt$n.components,
metric = opt$metric,
n.epochs = opt$n.epochs,
learning.rate = opt$learning.rate,
min.dist = opt$min.dist,
spread = opt$spread,
set.op.mix.ratio = opt$set.op.mix.ratio,
local.connectivity = opt$local.connectivity,
repulsion.strength = opt$repulsion.strength,
negative.sample.rate = opt$negative.sample.rate,
a = opt$a,
b = opt$b,
uwot.sgd = opt$uwot.sgd,
seed.use = opt$seed.use,
metric.kwds = opt$metric.kwds,
angular.rp.forest = opt$angular.rp.forest,
verbose = !opt$do_not_verbose,
reduction.name = opt$reduction.name,
reduction.key = opt$reduction.key)
write_seurat4_object(seurat_object = seurat_object_umap,
output_path = opt$output_object_file,
format = opt$output_format)