-
Notifications
You must be signed in to change notification settings - Fork 15
/
nextflow_schema.json
1346 lines (1346 loc) · 65.2 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/nf-core/scflow/master/nextflow_schema.json",
"title": "nf-core/scflow pipeline parameters",
"description": "Complete analysis workflow for single-cell/nuclei RNA-sequencing data.",
"type": "object",
"definitions": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"properties": {
"manifest": {
"type": "string",
"default": "./refs/Manifest.txt",
"fa_icon": "fas fa-table",
"description": "The .tsv file specifying sample matrix filepaths."
},
"input": {
"type": "string",
"default": "./refs/SampleSheet.tsv",
"fa_icon": "fas fa-table",
"description": "The .tsv file specifying sample metadata."
},
"ensembl_mappings": {
"type": "string",
"default": "https://raw.githubusercontent.com/nf-core/test-datasets/scflow/assets/ensembl_mappings.tsv",
"fa_icon": "fas fa-table",
"description": "Optional tsv file containing mappings between ensembl_gene_id's and gene_names's"
},
"ctd_path": {
"type": "string",
"default": "https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/28033407/ctd_v1.zip",
"description": "Cell-type annotations reference file path",
"help_text": "This is a zip file containing cell-type annotation reference files for the EWCE package.",
"fa_icon": "fas fa-file-archive"
},
"celltype_mappings": {
"type": "string",
"default": "./conf/celltype_mappings.tsv",
"fa_icon": "fas fa-table",
"description": "Optional tsv file specifying manual revisions of cell-type annotations."
},
"reddim_genes_yml": {
"type": "string",
"default": "./conf/reddim_genes.yml",
"fa_icon": "fas fa-list",
"description": "Optional list of genes of interest in YML format for plotting of gene expression."
},
"species": {
"type": "string",
"default": "human",
"fa_icon": "fas fa-male",
"description": "Input sample species.",
"help_text": "Currently, \"human\" and \"mouse\" are supported."
},
"max_cores": {
"type": "string",
"default": "'null'",
"description": "Maximum CPU cores.",
"help_text": "The default value of 'null' utilizes all available CPU cores. Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores."
},
"outdir": {
"type": "string",
"default": "./results",
"description": "Outputs directory."
}
},
"required": [
"manifest",
"input",
"ensembl_mappings",
"ctd_path",
"species",
"max_cores"
],
"help_text": ""
},
"quality_control": {
"title": "Quality-Control",
"type": "object",
"description": "Parameters for quality-control and thresholding.",
"default": "",
"properties": {
"qc_key_colname": {
"type": "string",
"default": "manifest",
"description": "The sample sheet column name with unique sample identifiers.",
"fa_icon": "fas fa-user-tag"
},
"qc_factor_vars": {
"type": "string",
"default": "seqdate",
"description": "The sample sheet variables to treat as factors.",
"help_text": "All sample sheet columns with numbers which should be treated as factors should be specified here separated by commas. Examples include columns with dates, numeric sample identifiers, etc.",
"fa_icon": "fas fa-quote-left"
},
"qc_min_library_size": {
"type": "integer",
"default": 250,
"description": "Minimum library size (counts) per cell.",
"fa_icon": "fas fa-greater-than-equal"
},
"qc_max_library_size": {
"type": "string",
"default": "adaptive",
"description": "Maximum library size (counts) per cell.",
"fa_icon": "fas fa-less-than-equal"
},
"qc_min_features": {
"type": "integer",
"default": 100,
"description": "Minimum features (expressive genes) per cell.",
"fa_icon": "fas fa-greater-than-equal"
},
"qc_max_features": {
"type": "string",
"default": "adaptive",
"description": "Maximum features (expressive genes) per cell.",
"fa_icon": "fas fa-less-than-equal"
},
"qc_min_ribo": {
"type": "number",
"description": "Minimum proportion of counts mapping to ribosomal genes.",
"fa_icon": "fas fa-greater-than-equal",
"minimum": 0,
"maximum": 1
},
"qc_max_ribo": {
"type": "number",
"default": 1,
"description": "Maximum proportion of counts mapping to ribosomal genes.",
"fa_icon": "fas fa-less-than-equal",
"minimum": 0,
"maximum": 1
},
"qc_max_mito": {
"type": "string",
"default": "adaptive",
"description": "Maximum proportion of counts mapping to mitochondrial genes.",
"fa_icon": "fas fa-less-than-equal"
},
"qc_min_counts": {
"type": "integer",
"default": 2,
"description": "Minimum counts for gene expressivity.",
"help_text": "Expressive genes must have >=min_counts in >=min_cells",
"fa_icon": "fas fa-greater-than-equal"
},
"qc_min_cells": {
"type": "integer",
"default": 2,
"description": "Minimum cells for gene expressivity.",
"help_text": "Expressive genes must have >=min_counts in >=min_cells",
"fa_icon": "fas fa-greater-than-equal"
},
"qc_drop_unmapped": {
"type": "string",
"default": "True",
"description": "Option to drop unmapped genes.",
"fa_icon": "fas fa-cut"
},
"qc_drop_mito": {
"type": "string",
"default": "True",
"description": "Option to drop mitochondrial genes.",
"fa_icon": "fas fa-cut"
},
"qc_drop_ribo": {
"type": "string",
"description": "Option to drop ribosomal genes.",
"fa_icon": "fas fa-cut",
"default": "false"
},
"qc_nmads": {
"type": "number",
"default": 4,
"description": "The number of MADs for outlier detection.",
"help_text": "The number of median absolute deviations (MADs) used to define outliers for adaptive thresholding.",
"fa_icon": "fas fa-mountain"
}
},
"fa_icon": "far fa-check-circle",
"required": [
"qc_key_colname",
"qc_factor_vars",
"qc_min_library_size",
"qc_max_library_size",
"qc_min_features",
"qc_max_features",
"qc_max_ribo",
"qc_max_mito",
"qc_min_counts",
"qc_min_cells",
"qc_drop_unmapped",
"qc_drop_mito",
"qc_drop_ribo",
"qc_nmads"
]
},
"ambient_rna_profiling": {
"title": "Ambient RNA Profiling",
"type": "object",
"description": "Options for profiling ambient RNA/empty droplets.",
"default": "",
"properties": {
"amb_find_cells": {
"type": "string",
"default": "True",
"description": "Enable ambient RNA / empty droplet profiling.",
"fa_icon": "fas fa-cut"
},
"amb_retain": {
"type": "string",
"default": "auto",
"help_text": "A numeric scalar specifying the threshold for the total UMI count above which all barcodes are assumed to contain cells, or \"auto\" for automated estimation based on the data.",
"description": "Upper UMI counts threshold for true cell annotation.",
"pattern": "^(\\d+|auto)$",
"fa_icon": "fas fa-less-than-equal"
},
"amb_lower": {
"type": "integer",
"default": 100,
"help_text": "A numeric scalar specifying the lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets.",
"description": "Lower UMI counts threshold for empty droplet annotation.",
"fa_icon": "fas fa-greater-than-equal"
},
"amb_alpha_cutoff": {
"type": "number",
"default": 0.001,
"description": "The maximum FDR for the emptyDrops algorithm.",
"fa_icon": "fas fa-less-than-equal"
},
"amb_niters": {
"type": "integer",
"default": 10000,
"help_text": "An integer scalar specifying the number of iterations to use for the Monte Carlo p-value calculations for the emptyDrops algorithm.",
"description": "Number of Monte Carlo p-value iterations.",
"fa_icon": "fas fa-recycle"
},
"amb_expect_cells": {
"type": "integer",
"default": 3000,
"description": "Expected number of cells per sample.",
"help_text": "If the \"retain\" parameter is set to \"auto\" (recommended), then this parameter is used to identify the optimal value for \"retain\" for the emptyDrops algorithm.",
"fa_icon": "fas fa-greater-than-equal"
}
},
"fa_icon": "far fa-chart-bar",
"required": [
"amb_find_cells",
"amb_retain",
"amb_lower",
"amb_alpha_cutoff",
"amb_niters",
"amb_expect_cells"
]
},
"multiplet_identification": {
"title": "Multiplet Identification",
"type": "object",
"description": "Parameters for identifying singlets/doublets/multiplets.",
"default": "",
"properties": {
"mult_find_singlets": {
"type": "string",
"default": "True",
"description": "Enable doublet/multiplet identification.",
"fa_icon": "fas fa-cut"
},
"mult_singlets_method": {
"type": "string",
"default": "doubletfinder",
"description": "Algorithm to use for doublet/multiplet identification."
},
"mult_vars_to_regress_out": {
"type": "string",
"default": "nCount_RNA,pc_mito",
"description": "Variables to regress out for dimensionality reduction."
},
"mult_pca_dims": {
"type": "integer",
"default": 10,
"description": "Number of PCA dimensions to use."
},
"mult_var_features": {
"type": "integer",
"default": 2000,
"description": "The top n most variable features to use."
},
"mult_doublet_rate": {
"type": "number",
"description": "A fixed doublet rate.",
"help_text": "Use a fixed default rate (e.g. 0.075 to specify that 7.5% of all cells should be marked as doublets), or set to 0 to use the \"dpk\" method (recommended)."
},
"mult_dpk": {
"type": "integer",
"default": 8,
"description": "Doublets per thousand cells increment.",
"help_text": "The doublets per thousand cell increment specifies the expected doublet rate based on the number of cells, i.e. with a dpk of 8 (recommended by 10X), a dataset with 1000 cells is expected to contain 8 doublets per thousand cells, a dataset with 2000 cells is expected to contain 16 doublets per thousand cells, and a dataset with 10000 cells is expected to contain 80 cells per thousand cells (or 800 doublets in total). If the \"doublet_rate\" parameter is manually specified this recommended incremental behaviour is overridden.",
"minimum": 0,
"maximum": 1000
},
"mult_pK": {
"type": "number",
"default": 0.02,
"description": "Specify a pK value instead of parameter sweep.",
"help_text": "The optimal pK value used by the doubletFinder algorithm is determined following a compute-intensive parameter sweep. The parameter sweep can be overridden by manually specifying a pK value."
}
},
"fa_icon": "fas fa-adjust",
"required": [
"mult_find_singlets",
"mult_singlets_method",
"mult_vars_to_regress_out",
"mult_pca_dims",
"mult_var_features",
"mult_dpk",
"mult_pK"
]
},
"merge": {
"title": "Merge",
"type": "object",
"description": "Parameters used in the merged quality-control report.",
"default": "",
"properties": {
"merge_plot_vars": {
"type": "string",
"default": "total_features_by_counts,total_counts,pc_mito,pc_ribo",
"description": "Numeric variables for inter-sample metrics.",
"help_text": "A comma-separated list of numeric variables which differ between individual cells of each sample. The merged sample report will include plots facilitating between-sample comparisons for each of these numeric variables."
},
"merge_facet_vars": {
"type": "string",
"default": "NULL",
"description": "Categorical variables for further sub-setting of plots",
"help_text": "A comma-separated list of categorical variables. The merged sample report will include additional plots of sample metrics subset by each of these variables (e.g. sex, diagnosis)."
},
"merge_outlier_vars": {
"type": "string",
"default": "total_features_by_counts,total_counts",
"description": "Numeric variables for outlier identification.",
"help_text": "The merged report will include tables highlighting samples that are putative outliers for each of these numeric variables."
}
},
"fa_icon": "fas fa-object-ungroup",
"required": [
"merge_plot_vars",
"merge_facet_vars",
"merge_outlier_vars"
]
},
"integration": {
"title": "Integration",
"type": "object",
"description": "Parameters for integrating datasets and batch correction.",
"default": "",
"properties": {
"integ_method": {
"type": "string",
"default": "Liger",
"description": "Choice of integration method."
},
"integ_k": {
"type": "integer",
"default": 30,
"description": "Inner dimension of factorization (n factors).",
"help_text": "See rliger::optimizeALS(). Inner dimension of factorization (number of factors). Run suggestK to determine appropriate value; a general rule of thumb is that a higher k will be needed for datasets with more sub-structure."
},
"integ_unique_id_var": {
"type": "string",
"default": "manifest",
"description": "Unique sample identifier variable."
},
"integ_take_gene_union": {
"type": "string",
"description": "Fill out matrices with union of genes.",
"help_text": "See rliger::createLiger(). Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE).",
"fa_icon": "fas fa-cut",
"default": "false"
},
"integ_remove_missing": {
"type": "string",
"default": "True",
"description": "Remove non-expressing cells/genes.",
"help_text": "See rliger::createLiger(). Whether to remove cells not expressing any measured genes, and genes not expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE)."
},
"integ_num_genes": {
"type": "integer",
"default": 3000,
"description": "Number of genes to find for each dataset.",
"help_text": "See rliger::selectGenes(). Number of genes to find for each dataset. Optimises the value of var.thresh for each dataset to get this number of genes."
},
"integ_combine": {
"type": "string",
"default": "union",
"description": "How to combine variable genes across experiments.",
"help_text": "See rliger::selectGenes(). Either \"union\" or \"intersection\".",
"fa_icon": "fas fa-calculator"
},
"integ_capitalize": {
"type": "string",
"description": "Capitalize gene names to match homologous genes.",
"help_text": "See rliger::selectGenes().",
"fa_icon": "fab fa-adn",
"default": "false"
},
"integ_use_cols": {
"type": "string",
"default": "True",
"description": "Treat each column as a cell.",
"help_text": "See rliger::removeMissingObs()."
},
"integ_lambda": {
"type": "number",
"default": 5,
"description": "Regularization parameter.",
"help_text": "See rliger::optimizeALS(). Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases). Run suggestLambda to determine most appropriate value for balancing dataset alignment and agreement (default 5.0)."
},
"integ_thresh": {
"type": "number",
"default": 0.0001,
"description": "Convergence threshold.",
"help_text": "See rliger::optimizeALS()."
},
"integ_max_iters": {
"type": "integer",
"default": 100,
"description": "Maximum number of block coordinate descent iterations.",
"help_text": "See rliger::optimizeALS()."
},
"integ_nrep": {
"type": "integer",
"default": 1,
"description": "Number of restarts to perform.",
"help_text": "See rliger::optimizeALS()."
},
"integ_rand_seed": {
"type": "integer",
"default": 1,
"description": "Random seed for reproducible results."
},
"integ_quantiles": {
"type": "integer",
"default": 50,
"description": "Number of quantiles to use for normalization.",
"help_text": "See rliger::quantile_norm()."
},
"integ_ref_dataset": {
"type": "string",
"default": "NULL",
"description": "Name of dataset to use as a reference.",
"help_text": "See rliger::quantile_norm(). Name of dataset to use as a \"reference\" for normalization. By default, the dataset with the largest number of cells is used."
},
"integ_min_cells": {
"type": "integer",
"default": 2,
"description": "Minimum number of cells to consider a cluster shared across datasets.",
"help_text": "See rliger::quantile_norm()."
},
"integ_knn_k": {
"type": "integer",
"default": 20,
"description": "Number of neearest neighbours for within-dataset knn graph.",
"help_text": "See rliger::quantile_norm()."
},
"integ_center": {
"type": "string",
"default": "false",
"description": "Center the data when scaling factors.",
"help_text": "See rliger::quantile_norm()."
},
"integ_resolution": {
"type": "integer",
"default": 1,
"description": "Controls the number of communities detected.",
"help_text": "See rliger::quantileAlignSNF().",
"fa_icon": "fas fa-calculator"
},
"integ_categorical_covariates": {
"type": "string",
"default": "individual,diagnosis,region,sex",
"description": "Categorical variables for integration report metrics.",
"help_text": "The integration report will provide plots and integration metrics for these categorical variables."
},
"integ_input_reduced_dim": {
"type": "string",
"default": "UMAP",
"description": "Reduced dimension embedding for the integration report.",
"help_text": "The integration report will provide with and without integration plots using this embedding."
}
},
"fa_icon": "far fa-object-group",
"required": [
"integ_method",
"integ_k",
"integ_unique_id_var",
"integ_take_gene_union",
"integ_remove_missing",
"integ_num_genes",
"integ_combine",
"integ_capitalize",
"integ_use_cols",
"integ_lambda",
"integ_thresh",
"integ_max_iters",
"integ_nrep",
"integ_rand_seed",
"integ_quantiles",
"integ_ref_dataset",
"integ_min_cells",
"integ_knn_k",
"integ_center",
"integ_resolution",
"integ_categorical_covariates",
"integ_input_reduced_dim"
]
},
"dimensionality_reduction": {
"title": "Dimensionality Reduction",
"type": "object",
"description": "Settings for dimensionality reduction algorithms.",
"default": "",
"properties": {
"reddim_input_reduced_dim": {
"type": "string",
"default": "PCA,Liger",
"description": "Input matrix for dimension reduction."
},
"reddim_reduction_methods": {
"type": "string",
"default": "tSNE,UMAP,UMAP3D",
"description": "Dimension reduction outputs to generate.",
"help_text": "Typically 'UMAP,UMAP3D' or 'tSNE'."
},
"reddim_vars_to_regress_out": {
"type": "string",
"default": "nCount_RNA,pc_mito",
"description": "Variables to regress out before dimension reduction."
},
"reddim_umap_pca_dims": {
"type": "integer",
"default": 30,
"description": "Number of PCA dimensions.",
"help_text": "See uwot::umap()."
},
"reddim_umap_n_neighbors": {
"type": "integer",
"default": 35,
"description": "Number of nearest neighbours to use.",
"help_text": "See uwot::umap()."
},
"reddim_umap_n_components": {
"type": "integer",
"default": 2,
"description": "The dimension of the space to embed into.",
"help_text": "See uwot::umap(). The dimension of the space to embed into. This defaults to 2 to provide easy visualization, but can reasonably be set to any integer value in the range 2 to 100."
},
"reddim_umap_init": {
"type": "string",
"default": "spectral",
"description": "Type of initialization for the coordinates.",
"help_text": "See uwot::umap().",
"enum": [
"spectral",
"normlaplacian",
"random",
"lvrandom",
"laplacian",
"pca",
"spca",
"agspectral"
]
},
"reddim_umap_metric": {
"type": "string",
"default": "euclidean",
"description": "Distance metric for finding nearest neighbours.",
"help_text": "See uwot::umap().",
"enum": [
"euclidean",
"cosine",
"manhattan",
"hamming",
"correlation",
"categorical"
]
},
"reddim_umap_n_epochs": {
"type": "integer",
"default": 200,
"description": "Number of epochs to us during optimization of embedded coordinates.",
"help_text": "See uwot::umap()."
},
"reddim_umap_learning_rate": {
"type": "integer",
"default": 1,
"description": "Initial learning rate used in optimization of coordinates.",
"help_text": "See uwot::umap()."
},
"reddim_umap_min_dist": {
"type": "number",
"default": 0.4,
"description": "Effective minimum distance between embedded points.",
"help_text": "See uwot::umap(). Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out."
},
"reddim_umap_spread": {
"type": "number",
"default": 0.85,
"description": "Effective scale of embedded points.",
"help_text": "See uwot::umap(). In combination with min_dist, this determines how clustered/clumped the embedded points are."
},
"reddim_umap_set_op_mix_ratio": {
"type": "number",
"default": 1,
"description": "Interpolation to combine local fuzzy sets.",
"help_text": "See uwot::umap(). The value of this parameter should be between 0.0 and 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.",
"minimum": 0,
"maximum": 1
},
"reddim_umap_local_connectivity": {
"type": "integer",
"default": 1,
"description": "Local connectivity required.",
"help_text": "See uwot::umap(). The local connectivity required \u2013 i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally."
},
"reddim_umap_repulsion_strength": {
"type": "integer",
"default": 1,
"description": "Weighting applied to negative samples in embedding optimization.",
"help_text": "See uwot::umap(). Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples."
},
"reddim_umap_negative_sample_rate": {
"type": "integer",
"default": 5,
"description": "Number of negative edge samples to use per positive edge sample.",
"help_text": "See uwot::umap(). The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding."
},
"reddim_umap_fast_sgd": {
"type": "string",
"description": "Use fast SGD.",
"help_text": "See uwot::umap(). Setting this to TRUE will speed up the stochastic optimization phase, but give a potentially less accurate embedding, and which will not be exactly reproducible even with a fixed seed. For visualization, fast_sgd = TRUE will give perfectly good results. For more generic dimensionality reduction, it's safer to leave fast_sgd = FALSE.",
"fa_icon": "fas fa-skiing",
"default": "false"
},
"reddim_tsne_dims": {
"type": "integer",
"default": 2,
"description": "Output dimensionality.",
"help_text": "See Rtsne::Rtsne()."
},
"reddim_tsne_initial_dims": {
"type": "integer",
"default": 50,
"description": "Number of dimensions retained in the initial PCA step.",
"help_text": "See Rtsne::Rtsne()."
},
"reddim_tsne_perplexity": {
"type": "integer",
"default": 150,
"description": "Perplexity parameter.",
"help_text": "See Rtsne::Rtsne()."
},
"reddim_tsne_theta": {
"type": "number",
"default": 0.5,
"description": "Speed/accuracy trade-off.",
"help_text": "See Rtsne::Rtsne(). Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5)."
},
"reddim_tsne_stop_lying_iter": {
"type": "integer",
"default": 250,
"description": "Iteration after which perplexities are no longer exaggerated.",
"help_text": "See Rtsne::Rtsne(). Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0)."
},
"reddim_tsne_mom_switch_iter": {
"type": "integer",
"default": 250,
"description": "Iteration after which the final momentum is used.",
"help_text": "See Rtsne::Rtsne(). Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0)."
},
"reddim_tsne_max_iter": {
"type": "integer",
"default": 1000,
"description": "Number of iterations.",
"help_text": "See Rtsne::Rtsne(). "
},
"reddim_tsne_pca_center": {
"type": "string",
"default": "True",
"description": "Center data before PCA.",
"help_text": "See Rtsne::Rtsne(). Should data be centered before pca is applied? (default: TRUE)"
},
"reddim_tsne_pca_scale": {
"type": "string",
"description": "Scale data before PCA.",
"help_text": "See Rtsne::Rtsne(). Should data be scaled before pca is applied? (default: FALSE).",
"fa_icon": "fas fa-balance-scale",
"default": "false"
},
"reddim_tsne_normalize": {
"type": "string",
"default": "True",
"description": "Normalize data before distance calculations.",
"help_text": "See Rtsne::Rtsne(). Should data be normalized internally prior to distance calculations with normalize_input? (default: TRUE)"
},
"reddim_tsne_momentum": {
"type": "number",
"default": 0.5,
"description": "Momentum used in the first part of optimization.",
"help_text": "See Rtsne::Rtsne(). "
},
"reddim_tsne_final_momentum": {
"type": "number",
"default": 0.8,
"description": "Momentum used in the final part of optimization.",
"help_text": "See Rtsne::Rtsne(). "
},
"reddim_tsne_eta": {
"type": "integer",
"default": 1000,
"description": "Learning rate.",
"help_text": "See Rtsne::Rtsne(). "
},
"reddim_tsne_exaggeration_factor": {
"type": "integer",
"default": 12,
"description": "Exaggeration factor used in the first part of the optimization.",
"help_text": "See Rtsne::Rtsne(). Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0)."
}
},
"fa_icon": "fas fa-cubes",
"required": [
"reddim_input_reduced_dim",
"reddim_reduction_methods",
"reddim_vars_to_regress_out",
"reddim_umap_pca_dims",
"reddim_umap_n_neighbors",
"reddim_umap_n_components",
"reddim_umap_init",
"reddim_umap_metric",
"reddim_umap_n_epochs",
"reddim_umap_learning_rate",
"reddim_umap_min_dist",
"reddim_umap_spread",
"reddim_umap_set_op_mix_ratio",
"reddim_umap_local_connectivity",
"reddim_umap_repulsion_strength",
"reddim_umap_negative_sample_rate",
"reddim_umap_fast_sgd",
"reddim_tsne_dims",
"reddim_tsne_initial_dims",
"reddim_tsne_perplexity",
"reddim_tsne_theta",
"reddim_tsne_stop_lying_iter",
"reddim_tsne_mom_switch_iter",
"reddim_tsne_max_iter",
"reddim_tsne_pca_center",
"reddim_tsne_pca_scale",
"reddim_tsne_normalize",
"reddim_tsne_momentum",
"reddim_tsne_final_momentum",
"reddim_tsne_eta",
"reddim_tsne_exaggeration_factor"
]
},
"clustering": {
"title": "Clustering",
"type": "object",
"description": "Parameters used to tune louvain/leiden clustering.",
"default": "",
"properties": {
"clust_cluster_method": {
"type": "string",
"default": "leiden",
"description": "Clustering method.",
"help_text": "Specify \"leiden\" or \"louvain\"."
},
"clust_reduction_method": {
"type": "string",
"default": "UMAP_Liger",
"description": "Reduced dimension input(s) for clustering.",
"help_text": "One or more of \"UMAP\", \"tSNE\", \"PCA\", \"LSI\"."
},
"clust_res": {
"type": "number",
"default": 0.001,
"description": "The resolution of clustering."
},
"clust_k": {
"type": "integer",
"default": 50,
"description": "Integer number of nearest neighbours for clustering.",
"help_text": "Integer number of nearest neighbors to use when creating the k nearest neighbor graph for Louvain/Leiden clustering. k is related to the resolution of the clustering result, a bigger k will result in lower resolution and vice versa."
},
"clust_louvain_iter": {
"type": "integer",
"default": 1,
"description": "The number of iterations for clustering."
}
},
"fa_icon": "fas fa-braille",
"required": [
"clust_cluster_method",
"clust_reduction_method",
"clust_res",
"clust_k",
"clust_louvain_iter"
]
},
"cell_type_annotation": {
"title": "Cell-type Annotation",
"type": "object",
"description": "Parameters used for cell-type annotation and the associated report.",
"default": "",
"properties": {
"cta_clusters_colname": {
"type": "string",
"default": "clusters",
"description": "SingleCellExperiment clusters colData variable name."
},
"cta_cells_to_sample": {
"type": "integer",
"default": 10000,
"description": "Max cells to sample."
},
"cta_unique_id_var": {
"type": "string",
"default": "individual",
"description": "A sample metadata unique sample ID."
},
"cta_celltype_var": {
"type": "string",
"default": "cluster_celltype",
"description": "SingleCellExperiment cell-type colData variable name."
},
"cta_facet_vars": {
"type": "string",
"default": "manifest,diagnosis,sex,capdate,prepdate,seqdate",
"description": "Cell-type metrics for categorical variables."
},
"cta_metric_vars": {
"type": "string",
"default": "pc_mito,pc_ribo,total_counts,total_features_by_counts",
"description": "Cell-type metrics for numeric variables."
},
"cta_top_n": {
"type": "integer",
"default": 5,
"description": "Number of top marker genes for plot/table generation."
}
},
"fa_icon": "fas fa-brain",
"required": [
"cta_clusters_colname",
"cta_cells_to_sample",
"cta_unique_id_var",
"cta_celltype_var",
"cta_facet_vars",
"cta_metric_vars",
"cta_top_n"
]
},
"differential_gene_expression": {
"title": "Differential Gene Expression",
"type": "object",
"description": "Parameters for differential gene expression.",
"default": "",
"properties": {
"dge_de_method": {
"type": "string",
"default": "MASTZLM",
"description": "Differential gene expression method."
},
"dge_mast_method": {
"type": "string",
"default": "bayesglm",
"help_text": "See MAST::zlm(). Either 'glm', 'glmer' or 'bayesglm'.",
"description": "MAST method.",
"enum": [
"glm",
"glmer",
"bayesglm"
]
},
"dge_min_counts": {
"type": "integer",
"default": 1,
"description": "Expressive gene minimum counts.",
"help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression."
},
"dge_min_cells_pc": {
"type": "number",
"default": 0.1,
"minimum": 0,
"maximum": 1,
"description": "Expressive gene minimum cells fraction.",
"help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression. Default 0.1 (i.e. 10% of cells)."
},
"dge_rescale_numerics": {
"type": "string",
"default": "True",
"description": "Re-scale numeric covariates.",
"help_text": "Re-scaling and centring numeric covariates in a model can improve model performance."
},
"dge_pseudobulk": {
"type": "string",
"description": "Pseudobulked differential gene expression.",
"help_text": "Perform differential gene expression on a smaller matrix where counts are first summed across all cells within a sample (defined by dge_sample_var level).",
"fa_icon": "far fa-object-group",
"default": "false"
},
"dge_celltype_var": {
"type": "string",
"default": "cluster_celltype",
"description": "Cell-type annotation variable name.",
"help_text": "Differential gene expression is performed separately for each cell-type of this colData variable."
},
"dge_sample_var": {
"type": "string",
"default": "manifest",
"description": "Unique sample identifier variable."
},
"dge_dependent_var": {
"type": "string",
"default": "group",
"description": "Dependent variable of DGE model.",
"help_text": "The dependent variable may be a categorical (e.g. diagnosis) or a numeric (e.g. histopathology score) variable."
},
"dge_ref_class": {
"type": "string",
"default": "Control",
"help_text": "If a categorical dependent variable is specified, then the reference class of the dependent variable is specified here (e.g. 'Control').",
"description": "Reference class of categorical dependent variable."
},
"dge_confounding_vars": {
"type": "string",
"default": "cngeneson,seqdate,pc_mito",
"description": "Confounding variables.",
"help_text": "A comma-separated list of confounding variables to account for in the DGE model."
},
"dge_random_effects_var": {
"type": "string",
"default": "NULL",
"description": "Random effect confounding variable.",
"help_text": "If specified, the term `+ (1 | x ) +`is added to the model, where x is the specified random effects variable."
},
"dge_fc_threshold": {
"type": "number",
"default": 1.1,
"description": "Fold-change threshold for plotting.",
"help_text": "This absolute fold-change cut-off value is used in plots (e.g. volcano) and the DGE report."
},
"dge_pval_cutoff": {
"type": "number",
"default": 0.05,
"description": "Adjusted p-value cutoff.",
"help_text": "The adjusted p-value cutoff value is used in plots (e.g. volcano) and the DGE report."
},
"dge_n_label": {
"type": "number",
"default": 5,
"help_text": "The number of genes to label in plots (e.g. volcano) and the DGE report."
},
"dge_force_run": {
"type": "string",
"description": "Force model fit for non-full rank.",
"help_text": "A non-full rank model specification will return an error; to override this to return a warning only, set to TRUE.",
"fa_icon": "fas fa-exclamation",
"default": "false"
},
"dge_max_cores": {
"type": "string",
"description": "Maximum CPU cores.",
"help_text": "The default value of 'null' utilizes all available CPU cores. As each additional CPU core increases the number of genes simultaneously fit, the RAM/memory demand increases concomitantly. Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores.",
"fa_icon": "fas fa-microchip",
"default": "null"
}
},
"fa_icon": "fas fa-chart-bar",
"required": [
"dge_de_method",
"dge_mast_method",
"dge_min_counts",
"dge_min_cells_pc",
"dge_rescale_numerics",
"dge_pseudobulk",
"dge_celltype_var",
"dge_sample_var",
"dge_dependent_var",
"dge_ref_class",
"dge_confounding_vars",
"dge_random_effects_var",
"dge_fc_threshold",
"dge_pval_cutoff",
"dge_n_label",
"dge_force_run",
"dge_max_cores"