-
Notifications
You must be signed in to change notification settings - Fork 9
/
cook-train.rb
executable file
·157 lines (145 loc) · 4.17 KB
/
cook-train.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env ruby
require 'fileutils'
require 'optparse'
require 'ostruct'
# input data information
def make_rgc_path(energy, target)
"/cache/clas12/rg-c/production/summer22/pass1/#{energy}gev/#{target}/dst/recon"
end
DATA_HASH = {
# RGA
'rga_sp19' => '/cache/clas12/rg-a/production/recon/spring2019/torus-1/pass2/dst/recon',
# RGC
'rgc_su22_10.5GeV_Align' => make_rgc_path(10.5, 'Align'),
'rgc_su22_10.5GeV_C' => make_rgc_path(10.5, 'C'),
'rgc_su22_10.5GeV_CH2' => make_rgc_path(10.5, 'CH2'),
'rgc_su22_10.5GeV_ET' => make_rgc_path(10.5, 'ET'),
'rgc_su22_10.5GeV_ND3' => make_rgc_path(10.5, 'ND3'),
'rgc_su22_10.5GeV_NH3' => make_rgc_path(10.5, 'NH3'),
# 'rgc_su22_2.2GeV_Align' => make_rgc_path(2.2, 'Align'), # no DSTs here
'rgc_su22_2.2GeV_C' => make_rgc_path(2.2, 'C'),
'rgc_su22_2.2GeV_ET' => make_rgc_path(2.2, 'ET'),
'rgc_su22_2.2GeV_NH3' => make_rgc_path(2.2, 'NH3'),
}
YAML_FILE = 'train.qa.yaml'
# helper functions
def get_run_group(dataset_name)
dataset_name.split('_').first
end
def shorten_dataset_name(dataset_name)
dataset_name.split('_')[1..-1].map do |tok| # remove run group
tok.sub(/GeV/,'') # remove units
end.join # remove underscores
end
def print_info
yield if block_given?
puts "="*82
end
# parse options
options = OpenStruct.new
options.dataset = ''
options.outDir = "/volatile/clas12/users/#{ENV['LOGNAME']}"
options.coatjava = ''
options.printDataDir = false
OptionParser.new do |o|
o.banner = "USAGE: #{$0} [OPTIONS]..."
o.separator ''
o.separator 'REQUIRED OPTIONS:'
o.separator ''
o.on(
"--dataset [DATASET_NAME]",
String,
"the name of the dataset to process",
"Choose one of the following:"
) do |a|
if DATA_HASH.has_key? a
options.dataset = a
else
$stderr.puts "ERROR: dataset name '#{a}' is not defined"
exit 1
end
end
rgTmp = ''
DATA_HASH.keys.each do |key|
rg = get_run_group key
if rg != rgTmp
rgTmp = rg
o.separator ''
end
o.separator key.rjust(50)
end
o.separator ''
o.on(
"--coatjava [COATJAVA_VERSION]",
String,
"coatjava version"
) { |a| options.coatjava = a }
o.separator ''
o.separator 'OPTIONAL OPTIONS:'
o.separator ''
o.on(
"--outDir [OUT_DIR]",
String,
"output files will appear in [OUT_DIR]/qa_[DATASET_NAME]",
"Default: #{options.outDir}"
) { |a| options.outDir = a }
o.separator ''
o.on('--listDatasets', 'List the datasets and exit') do
puts DATA_HASH.keys
exit
end
o.separator ''
o.on('--printDataDir', 'Just print the source data directory, and exit;', 'requires --dataset') do
options.printDataDir = true
end
o.separator ''
o.on_tail('-h', '--help', 'Show this message') do
puts o
exit
end
end.parse!(ARGV.length>0 ? ARGV : ['--help'])
# handle --printDataDir
if options.printDataDir
unless options.dataset == ''
puts DATA_HASH[options.dataset]
else
$stderr.puts "ERROR: need --dataset set when using --printDataDir"
exit 1
end
exit
end
# check required args
print_info { puts "OPTIONS: #{options}" }
[ ['--dataset',options.dataset], ['--coatajava',options.coatjava] ].each do |n,o|
if o.empty?
$stderr.puts "ERROR: missing required argument for '#{n}'"
exit 1
end
end
# generate list of runs, using /mss
runListFile = 'tmp/runlist.txt'
FileUtils.mkdir_p 'tmp'
mssDir = DATA_HASH[options.dataset].gsub /^\/cache\//, '/mss/'
File.open(runListFile, 'w') do |out|
runList = Dir.glob("#{mssDir}/*/")
.map{ |dirName| dirName.split('/').last }
.map(&:to_i)
.sort
runList.each{ |run| out.puts run }
print_info { puts "runs = #{runList}" }
end
# generate clas12-workflow arguments
workflowArgs = {
:runGroup => get_run_group(options.dataset),
:model => 'ana',
:tag => shorten_dataset_name(options.dataset),
:coatjava => options.coatjava,
:runs => runListFile,
:inputs => mssDir,
:trainYaml => YAML_FILE,
:outDir => "#{options.outDir}/qa_#{options.dataset}",
}
cmd = ['clas12-workflow'] + workflowArgs.map{ |opt,val| "--#{opt.to_s} #{val}" }
# print clas12-workflow command and exec it
print_info { puts cmd.join " \\\n" }
exec *cmd