-
Notifications
You must be signed in to change notification settings - Fork 1
/
metrics.py
281 lines (216 loc) · 9.67 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# DISTRIBUTION STATEMENT A. Approved for public release. Distribution is unlimited.
#
# This material is based upon work supported by the Under Secretary of Defense for Research and
# Engineering under Air Force Contract No. FA8702-15-D-0001. Any opinions, findings, conclusions or
# recommendations expressed in this material are those of the author(s) and do not necessarily
# reflect the views of the Under Secretary of Defense for Research and Engineering.
#
# © 2019 Massachusetts Institute of Technology.
#
# MIT Proprietary, Subject to FAR52.227-11 Patent Rights - Ownership by the contractor (May 2014)
#
# The software/firmware is provided to you on an As-Is basis
#
# Delivered to the U.S. Government with Unlimited Rights, as defined in DFARS Part 252.227-7013 or
# 7014 (Feb 2014). Notwithstanding any copyright notice, U.S. Government rights in this work are
# defined by DFARS 252.227-7013 or DFARS 252.227-7014 as detailed above. Use of this work other
# than as specifically authorized by the U.S. Government may violate any copyrights that exist in
# this work.
import numpy as np
from numpy import ndarray
from .boxes import box_overlaps
from detection_utils.boxes import DEFAULT_NEG_THRESHOLD
from typing import NamedTuple
__all__ = ["confusion_matrix", "precision_and_recall"]
def confusion_matrix(
prediction_detections: ndarray,
truth_detections: ndarray,
threshold: float = DEFAULT_NEG_THRESHOLD,
num_foreground_classes: int = 3,
) -> np.ndarray:
""" Compute confusion matrix to evaluate the accuracy of a classification.
By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
is equal to the number of observations known to be in group :math:`i` and
predicted to be in group :math:`j`.
Thus in binary classification, the count of true negatives is
:math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is
:math:`C_{1,1}` and false positives is :math:`C_{0,1}`.
Parameters
----------
prediction_detections : numpy.ndarray, shape=(N, 5)
The predicted objects, in (left, top, right, bottom, class) format.
truth_detections : numpy.ndarray, shape=(K, 5)
The ground-truth objects in (left, top, right, bottom, class) format.
threshold : Real, optional
The IoU threshold above which a predicted box will be associated
with an overlapping truth box.
num_foreground_classes: int, optional (default=3)
The number of foreground class in the problem
Returns
-------
conf_matrix : numpy.ndarray, shape-(N_class, N_class)
Confusion matrix whose i-th row and j-th column entry
indicates the number of samples with true label being i-th
class and predicted label being j-th class.
Notes
-----
The class IDs must be consecutive integers, starting with 0, which
must be associated with the background
"""
predictions = prediction_detections[:, -1].astype(int) # shape-(N,) labels
truths = truth_detections[:, -1].astype(int) # shape-(K,) labels
ious = box_overlaps(prediction_detections[:, :4], truth_detections[:, :4])
# shape-(N,)
max_ious = ious.max(axis=1)
# index of highest-overlap truth box associated with each prediction
max_idxs = ious.argmax(axis=1) # shape-(N,)
# target label associated with each prediction
target_labels = truths[max_idxs] # shape-(N,)
# prediction boxes that don't sufficiently overlap with true
# boxes are ascribed "background" as their target label
target_labels[max_ious < threshold] = 0
# stores truth-label x target-label
conf_mat = np.zeros(
(num_foreground_classes + 1, num_foreground_classes + 1), dtype=np.int32
)
np.add.at(conf_mat, (target_labels, predictions), 1)
# true boxes with no sufficiently overlapping prediction are effectively
# predicted as "background"
unmatched_targets = ious.max(axis=0) < threshold # shape-(K,)
np.add.at(conf_mat[:, 0], truths[unmatched_targets], 1)
return conf_mat
class DetectionStats(NamedTuple):
precision: float
recall: float
def div_nan_is_1(numerator: int, denominator: int) -> int:
""" Returns numerator/denominator, treating 0/0 as 1."""
if numerator == 0 and denominator == 0:
return 1
else:
return numerator / denominator
def precision_and_recall(conf_matrix: np.ndarray) -> DetectionStats:
"""
Given the confusion matrix, C, computes the precision and recall:
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
Parameters
----------
conf_matrix : ndarray, shape-(N_class, N_class)
:math:`C_{0,0}` is assumed to be the count of true
negatives.
Returns
-------
stats : DetectionStats
A named tuple storing (precision, recall)
Notes
-----
The statistics reported here reflect only detection performance; i.e.
all foreground classes are treated singularly as "positive" and background
is treated as "negative".
"""
# TN = conf_matrix[0, 0]
FP = conf_matrix[0, 1:].sum()
FN = conf_matrix[1:, 0].sum()
TP = conf_matrix[1:, 1:].sum()
return DetectionStats(
precision=div_nan_is_1(TP, (TP + FP)), recall=div_nan_is_1(TP, (TP + FN))
)
def compute_precision(
prediction_detections: ndarray, truth_detections: ndarray, threshold: float = 0.5,
) -> float:
""" Compute the average precision of predictions given targets.
Precision is defined as the number of true positive predictions divided by the number
of total positive predictions.
Parameters
----------
prediction_detections : numpy.ndarray, shape=(N, 5)
The predicted objects, in (left, top, right, bottom, class) format.
truth_detections : numpy.ndarray, shape=(K, 5)
The ground-truth objects in (left, top, right, bottom, class) format.
threshold : Real, optional (default=0.5)
The IoU threshold at which to compute precision.
Returns
-------
float
The average precision (AP) for the given detections and truth.
Notes
-----
This function operates such that when there are zero predictions, precision is 1.
Examples
--------
>>> from detection_utils.metrics import compute_precision
>>> import numpy as np
>>> predictions = np.array([[0, 0, 10, 10, 1], [3, 3, 7, 7, 1]]) # left, top, right, bottom, class
>>> actual = np.array([[2, 3, 6, 7, 1]])
>>> compute_precision(predictions, actual)
0.5
Our IoUs are 0.16, 0.6
>>> compute_precision(predictions, actual, threshold=0.15)
1.0
>>> compute_precision(predictions, actual, threshold=0.75)
0.0
"""
# we can short-circuit with a couple special cases to improve our efficiency
predictions = prediction_detections[:, -1]
truths = truth_detections[:, -1]
if predictions.sum() == 0:
return 1 # (0 TP) / (0 TP + 0 FP) is counted as 100% correct
if truths.sum() == 0:
return 0 # we've already handled the case where we found 0/0 relevant objects above
ious = box_overlaps(prediction_detections[:, :4], truth_detections[:, :4])
max_ious = ious.max(axis=1)
max_idxs = ious.argmax(axis=1)
target_labels = truths[max_idxs]
true_positive_idxs = np.logical_and(max_ious >= threshold, target_labels > 0)
num_true_positives = (predictions == target_labels)[true_positive_idxs].sum()
num_predicted_positives = (predictions > 0).sum()
return num_true_positives / num_predicted_positives
def compute_recall(
prediction_detections: ndarray, truth_detections: ndarray, threshold: float = 0.5,
) -> float:
""" Compute the average recall of predictions given targets.
Recall is defined as the number true positive predictions divided by the number of ground-truth targets.
Parameters
----------
prediction_detections : numpy.ndarray, shape=(N, 5)
The predicted objects, in (left, top, right, bottom, class) format.
truth_detections : numpy.ndarray, shape=(K, 5)
The ground-truth objects in (left, top, right, bottom, class) format.
threshold : Real, optional (default=0.5)
The IoU threshold at which to compute recall.
Returns
-------
float
The average recall (AR) for the given detections and truth.
Notes
-----
This function operates such that when there are zero targets, recall is 1 regardless of predictions.
Examples
--------
>>> from detection_utils.metrics import compute_recall
>>> import numpy as np
>>> predictions = np.array([[0, 0, 10, 10, 1], [3, 3, 7, 7, 1]]) # left, top, right, bottom, class
>>> actual = np.array([[2, 3, 6, 7, 1]])
>>> compute_recall(predictions, actual)
1.0
Our highest IoU is 0.6 so let's set our threshold above that
>>> compute_recall(predictions, actual, threshold=0.75)
0.0
"""
predictions = prediction_detections[:, -1]
truths = truth_detections[:, -1]
if truths.sum() == 0:
return (
1 # if there are no targets, then by definition we've found all the targets
)
if predictions.sum() == 0:
return 0 # if there are targets and we predict there are none, we can short circuit
ious = box_overlaps(prediction_detections[:, :4], truth_detections[:, :4])
max_ious = ious.max(axis=1)
max_idxs = ious.argmax(axis=1)
target_labels = truths[max_idxs]
true_positive_idxs = np.logical_and(max_ious >= threshold, target_labels > 0)
num_true_positives = (predictions == target_labels)[true_positive_idxs].sum()
num_false_negatives = (predictions != target_labels)[max_ious >= threshold].sum()
num_false_negatives += (ious.max(axis=0) < threshold).sum()
return num_true_positives / (num_true_positives + num_false_negatives)