-
Notifications
You must be signed in to change notification settings - Fork 5
/
run_test.sh
91 lines (83 loc) · 3.24 KB
/
run_test.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
# Script to run image-guided chart captioning experiments.
function usage {
echo "usage: $0 [-p predictions_path] [-f results_filename] [--split_eval] [--prefix_tuning]"
echo " "
echo " -p predictions_path Path to test set predictions"
echo " -f results_filename Filename to save results under in metrics/"
echo " --split_eval Also separately run evaluations for L1 and L2/L3 captions"
echo " --prefix_tuning Apply semantic prefix-tuning"
exit 1
}
# Default parameter values.
predictions_path="" # Path to test set predictions.
results_filename="results.txt" # Filename of results.
split_eval=false # True for separately evaluating L1 and L2/L3 captions; false otherwise.
prefix_tuning=false # True
# Update parameters based on arguments passed to the script.
while [[ $1 != "" ]]; do
case $1 in
-p | --predictions_path)
shift
predictions_path=$1
;;
-f | --results_filename)
shift
results_filename=$1
;;
--split_eval)
shift
split_eval=true
;;
--prefix_tuning)
prefix_tuning=true
esac
shift
done
if [[ $predictions_path = "" ]]; then
echo "Invalid predictions file path: {$predictions_path}"
usage
fi
# Join results path
results_path="$(pwd)/metrics/$results_filename"
# Run test
if [[ $prefix_tuning = false ]]; then
if [[ $split_eval = true ]]; then
echo "Invalid argument: Split evaluation requires can only be used with prefix tuning."
usage
else
PYTHONPATH=$PYTHONPATH:./src \
python code/run_metrics.py \
--test_file data/data_test.json \
--predictions_path $predictions_path \
--save_results \
--results_path $results_path \
--no_bleurt
fi
else
if [[ $split_eval = true ]]; then
PYTHONPATH=$PYTHONPATH:./src \
python code/run_metrics.py \
--test_file data/data_test.json \
--predictions_path $predictions_path \
--save_results \
--results_path $results_path \
--split_eval \
--prefixtuning \
--no_bleurt
else
PYTHONPATH=$PYTHONPATH:./src \
python code/run_metrics.py \
--test_file data/data_test.json \
--predictions_path $predictions_path \
--save_results \
--results_path $results_path \
--prefixtuning \
--no_bleurt
fi
fi
# If the `--save_results` flag is not specified, metrics will only be printed to stdout.
# `--results_path` can be used to specify where to save the metrics to, otherwise it will be saved to the same folder as the predictions file under `results.txt`, overwriting any existing.
# To evaluate the L1 and L2L3 captions separately, use the `--split_eval` flag. This will not disable the combined L1L2L3 evaluation, and all three sets will be output (and saved, if specified).
# Metrics can be disabled from running using the `--no_X` flag, where `X` is the metric desired to be disabled. This can be helpful in the case of `--no_bleurt`, which disables BLEURT evaluation in the event that you do not available GPU resources.
# Specify the `--prefixtuning` flag for the prefix tuning case.