forked from protagolabs/odyssey-math
-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluate_response.py
51 lines (41 loc) · 1.65 KB
/
evaluate_response.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import json
from agents.evaluate import Evalutor
def load_jsonl(filename):
"""Load JSONL file and return a list of dictionaries."""
with open(filename, 'r') as file:
return [json.loads(line) for line in file]
def save_jsonl(data, filename):
"""Save a list of dictionaries to a JSONL file."""
with open(filename, 'w') as file:
for entry in data:
file.write(json.dumps(entry) + '\n')
def process_files(file_true, file_pred):
"""Process files to compare true and predicted answers and save results."""
true_answers = load_jsonl(file_true)
predicted_answers = load_jsonl(file_pred)
evalution = Evalutor()
results = []
for true, pred in zip(true_answers, predicted_answers):
problem_id = list(true.keys())[0]
true_info = true[problem_id]
pred_answer = pred[problem_id]["answer"]
comparison_result = evalution(question=true_info["question"], true=true_info["answer"], prediction=pred_answer)
result_data = {
problem_id: {
"true": true_info["answer"],
"prediction": pred_answer,
"is_correct": comparison_result,
"label": true_info["label"],
"level": true_info["level"],
}
}
results.append(result_data)
return results
def main():
file_true = 'final-odyssey-math-with-levels.jsonl'
file_pred = 'jsonl/clean/dbrx-instruct-solution-clean.jsonl'
results = process_files(file_true, file_pred)
save_jsonl(results, 'jsonl/eval/result-'+file_pred.split('/')[-1])
print("Results have been saved.")
if __name__ == "__main__":
main()