-
Notifications
You must be signed in to change notification settings - Fork 3
/
dvc.lock
42 lines (42 loc) · 1.32 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
schema: '2.0'
stages:
prepare-s2orc:
cmd: python scripts/s2orc/prepare_parquet_s2orc.py
deps:
- path: mars/s2orc/metadata/ai/
md5: 1bff989bf79e7b8cedccb963caf2c284.dir
size: 193936339
nfiles: 100
- path: scripts/s2orc/prepare_parquet_s2orc.py
md5: 1ec9038eda1aea162af1541aa944adf6
size: 645
outs:
- path: data/s2orc/s2orc_ai_prefiltered.parquet
md5: b7e11eba387eb511989bf74e37f9f950
size: 73253586
process-spacy-s2orc:
cmd: python scripts/s2orc/process_spacy_docs.py
deps:
- path: data/s2orc/s2orc_ai_prefiltered.csv
md5: 372b0a56816c5ecd1086344bea24498b
size: 90660899
- path: scripts/s2orc/process_spacy_docs.py
md5: 58d88a3a785fd99d45feb05258bfd82f
size: 509
outs:
- path: data/s2orc/s2orc_ai_prefiltered_processed_with_doi.pkl
md5: ebfa849796b26e87f519e3372d811bad
size: 5558460171
extract-keywords-s2orc:
cmd: python scripts/s2orc/extract_keywords.py
deps:
- path: data/s2orc/s2orc_ai_prefiltered_processed_with_doi.pkl
md5: ebfa849796b26e87f519e3372d811bad
size: 5558460171
- path: scripts/s2orc/extract_keywords.py
md5: 525a6a84dd496d9107cca48c2f7ee473
size: 791
outs:
- path: data/s2orc/extracted.csv
md5: bbbd22b86b56f7ca385f8326b52821e4
size: 124316244