-
Notifications
You must be signed in to change notification settings - Fork 2
/
fairtracks_track.schema.json
350 lines (350 loc) · 14.6 KB
/
fairtracks_track.schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://raw.githubusercontent.com/fairtracks/fairtracks_standard/v1/current/json/schema/fairtracks_track.schema.json",
"$version_url": "https://raw.githubusercontent.com/fairtracks/fairtracks_standard/v1/1.0.2/json/schema/fairtracks_track.schema.json",
"$comment": "JSON signature: ed944ef67a39c4b0b7a46d10f7c2977b084602ea99792edc79aef766d2346daf",
"title": "Track",
"type": "object",
"properties": {
"@schema": {
"title": "Schema URL",
"description": "The absolute URL of the 'current' version of the relevant FAIRtracks JSON schema within the same major version as the JSON document follows (which should ensure compatibility). Must match the value of '$id' in the linked schema",
"type": "string",
"format": "uri",
"pattern": "^(https?|ftp)://",
"const": "https://raw.githubusercontent.com/fairtracks/fairtracks_standard/v1/current/json/schema/fairtracks_track.schema.json"
},
"global_id": {
"title": "Global ID",
"description": "Global track identifier, resolvable by identifiers.org [to be created by us]",
"type": "string",
"namespace": "fairtracks"
},
"local_id": {
"title": "Local ID",
"description": "Submitter-local identifier (within investigation/hub) for track (in CURIE-format, if applicable)",
"type": "string",
"examples": [
"encode:ENCFF625ZYB",
"encode:ENCFF490NHU",
"encode:ENCFF718EPO",
"encode:ENCFF447OHZ",
"encode:ENCFF717PIO",
"encode:ENCFF615CNJ",
"encode:ENCFF955LOC",
"encode:ENCFF492CEQ"
],
"unique": true
},
"assembly_id": {
"title": "Genome Assembly ID",
"description": "Genome assembly identifier, resolvable by identifiers.org. Tracks should be annotated with the lowest version of the reference genome that contains all the sequences referenced by the track. Also, GCF (Refseq) ids should be preferred to GCA (Genbank) ids",
"type": "string",
"format": "curie",
"namespace": "insdc.gca",
"matchType": "canonical",
"examples": [
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26",
"insdc.gca:GCF_000001405.26"
]
},
"assembly_name": {
"title": "Genome Assembly Name",
"description": "Genome assembly name or synonym, according to the NCBI Assembly database. For tracks following UCSC-style chromosome names (e.g., \"chr1\"), the UCSC synonym should be used instead of the official name",
"type": "string",
"examples": [
"hg38",
"hg38",
"hg38",
"hg38",
"hg38",
"hg38",
"hg38",
"hg38"
]
},
"experiment_ref": {
"title": "Experiment Reference",
"description": "Reference to the experiment of the track (using the submitter-local identifier of the sample)",
"type": "string",
"format": "foreign_ref",
"examples": [
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP",
"encode:ENCSR000DQP"
],
"foreignProperty": "fairtracks_experiment.schema.json#local_id"
},
"raw_file_ids": {
"title": "Raw File IDs",
"type": "array",
"minItems": 1,
"items": {
"description": "List of identifiers to raw data files used to create track (typically BAM), resolvable by identifiers.org",
"type": "string",
"format": "curie",
"matchType": "canonical",
"examples": [
"encode:ENCFF207FGO;encode:ENCFF842FEQ",
"encode:ENCFF207FGO;encode:ENCFF642FEV",
"encode:ENCFF207FGO;encode:ENCFF842FEQ",
"encode:ENCFF207FGO;encode:ENCFF642FEV",
"encode:ENCFF207FGO;encode:ENCFF842FEQ",
"encode:ENCFF207FGO;encode:ENCFF642FEV",
"encode:ENCFF207FGO;encode:ENCFF842FEQ",
"encode:ENCFF207FGO;encode:ENCFF642FEV",
""
]
}
},
"file_url": {
"title": "File URL",
"description": "A URL to the track data file",
"type": "string",
"format": "uri",
"pattern": "^(https?|ftp|rsync)://",
"examples": [
"https://www.encodeproject.org/files/ENCFF625ZYB/@@download/ENCFF625ZYB.bed.gz",
"https://www.encodeproject.org/files/ENCFF490NHU/@@download/ENCFF490NHU.bed.gz",
"https://www.encodeproject.org/files/ENCFF718EPO/@@download/ENCFF718EPO.bigBed",
"https://www.encodeproject.org/files/ENCFF447OHZ/@@download/ENCFF447OHZ.bigBed",
"https://www.encodeproject.org/files/ENCFF717PIO/@@download/ENCFF717PIO.bigWig",
"https://www.encodeproject.org/files/ENCFF615CNJ/@@download/ENCFF615CNJ.bigWig",
"https://www.encodeproject.org/files/ENCFF955LOC/@@download/ENCFF955LOC.bigWig",
"https://www.encodeproject.org/files/ENCFF492CEQ/@@download/ENCFF492CEQ.bigWig"
]
},
"file_name": {
"title": "File Name",
"description": "Name of the track file",
"type": "string",
"examples": [
"ENCFF625ZYB.bed.gz",
"ENCFF490NHU.bed.gz",
"ENCFF718EPO.bigBed",
"ENCFF447OHZ.bigBed",
"ENCFF717PIO.bigWig",
"ENCFF615CNJ.bigWig",
"ENCFF955LOC.bigWig",
"ENCFF492CEQ.bigWig"
],
"augmented": true
},
"label_short": {
"title": "Label (Short)",
"description": "A short label of the track file. Suggested maximum length is 25 characters",
"type": "string",
"examples": [
"H3K4me3 B cell peaks r1",
"H3K4me3 B cell peaks r2",
"H3K4me3 B cell peaks r1",
" H3K4me3 B cell peaks r2",
"H3K4me3 B cell fold change r1",
"H3K4me3 B cell fold change r2",
"H3K4me3 B cell signal p-value r1",
"H3K4me3 B cell p-value r2"
]
},
"label_long": {
"title": "Label (Long)",
"description": "A long label of the track file. Suggested maximum length is 80 characters",
"type": "string",
"examples": [
"H3K4me3 ChIP-seq of B cell peaks rep1 ENCSR000DQP - ENCFF625ZYB",
"H3K4me3 ChIP-seq of B cell peaks rep2 ENCSR000DQP - ENCFF490NHU",
"H3K4me3 ChIP-seq of B cell peaks rep1 ENCSR000DQP - ENCFF718EPO",
"H3K4me3 ChIP-seq of B cell peaks rep2 ENCSR000DQP - ENCFF447OHZ",
"H3K4me3 ChIP-seq of B cell fold change over control rep1 ENCSR000DQP - ENCFF717PIO",
"H3K4me3 ChIP-seq of B cell fold change over control rep2 ENCSR000DQP - ENCFF615CNJ",
"H3K4me3 ChIP-seq of B cell signal p-value rep1 ENCSR000DQP - ENCFF955LOC",
"H3K4me3 ChIP-seq of B cell signal p-value rep2 ENCSR000DQP - ENCFF492CEQ"
]
},
"file_format": {
"title": "File Format",
"description": "File format of the track data file",
"type": "object",
"ontologyTermPair": {
"id": "0/term_id",
"label": "0/term_label"
},
"properties": {
"term_id": {
"title": "Term ID",
"description": "URL linking to an ontology term",
"type": "string",
"format": "term",
"pattern": "^(https?|ftp)://",
"ontology": "http://edamontology.org/EDAM.owl",
"matchType": "exact",
"examples": [
"http://edamontology.org/format_3613",
"http://edamontology.org/format_3613",
"http://edamontology.org/format_3004",
"http://edamontology.org/format_3004",
"http://edamontology.org/format_3006",
"http://edamontology.org/format_3006",
"http://edamontology.org/format_3006",
"http://edamontology.org/format_3006"
]
},
"term_label": {
"title": "Term Label",
"description": "Exact value according to the ontology used",
"type": "string",
"examples": [
"ENCODE narrow peak format",
"ENCODE narrow peak format",
"bigBed",
"bigBed",
"bigWig",
"bigWig",
"bigWig",
"bigWig"
],
"augmented": true
}
},
"required": [
"term_id"
]
},
"type_of_condensed_data": {
"title": "Type of Condensed Track Data",
"description": "Type of condensed track data: Track data, by definition, is formed downstream of some data condensation process. However, the condensed data vary in form and content, technically speaking, and thus in their interpretation. Still, there is a limited set of common types of condensed track data which are able to describe the vast majority of track files",
"type": "string",
"enum": [
"Sequence-derived regions",
"Experimentally-derived regions",
"Predicted regions",
"Predicted segmentation",
"Population-derived variants",
"Individual variants",
"Peaks",
"Broad peaks",
"Narrow peaks",
"Gapped peaks",
"Signal values (fold change)",
"Signal values (p-value)",
"Signal values (log likelihood)",
"Signal values (other)",
"Read coverage",
"Read counts",
"Mapped single-end reads",
"Mapped paired-end reads",
"Other"
],
"examples": [
"Narrow peaks",
"Narrow peaks",
"Narrow peaks",
"Narrow peaks",
"Signal values (fold change)",
"Signal values (fold change)",
"Signal values (p-value)",
"Signal values (p-value)"
]
},
"geometric_track_type": {
"title": "Geometric Track Type",
"description": "Geometric type of track, according to the delineation of tracks into one of fifteen logical track types based upon their core informational properties (see doi:10.1186/1471-2105-12-494) ",
"type": "string",
"enum": [
"Points",
"Valued points",
"Segments",
"Valued segments",
"Genome partition",
"Step function",
"Function",
"Linked points",
"Linked valued points",
"Linked segments",
"Linked valued segments",
"Linked genome partition",
"Linked step function",
"Linked function",
"Linked base pairs"
],
"examples": [
"Segments",
"Segments",
"Segments",
"Segments",
"Step function",
"Step function",
"Step function",
"Step function"
]
},
"checksum": {
"title": "File Checksum",
"type": "object",
"properties": {
"cs_method": {
"title": "Method",
"description": "Method of checksum generation",
"type": "string",
"enum": [
"MD5",
"SHA-256"
],
"examples": [
"MD5",
"MD5",
"MD5",
"MD5",
"MD5",
"MD5",
"MD5",
"MD5"
]
},
"cs_hash": {
"title": "Hash",
"description": "Checksum of track file, using the method described in cs_method",
"type": "string",
"examples": [
"0ab98dbd2a2193ab3e09df030ea4f934",
" 3303226e0604d22900771d965fb7a3d2",
"6d2c6c99a7407f1c49ab163e41d9b575",
" aaa951221e1fbb62282bc73060de0339",
"550ae974bf94f1deb1676613ab24a5da",
" a1b4ae6b4eddd8fbd6edd6bf535d2eb5",
"942ea96e86313d4338cf3fd020882c4d",
" 51a548338912eef85b72aa21a9a2a555"
]
}
},
"required": [
"cs_method",
"cs_hash"
]
}
},
"required": [
"local_id",
"assembly_id",
"assembly_name",
"experiment_ref",
"file_url",
"label_short",
"label_long",
"file_format",
"type_of_condensed_data",
"geometric_track_type",
"checksum"
],
"additionalProperties": true
}