EleutherAI · lauritowal · Apr 26, 2023 · Apr 26, 2023 · Apr 26, 2023 · Apr 29, 2023
diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py
@@ -10,6 +10,7 @@
 from ..metrics import evaluate_preds
 from ..run import Run
 from ..utils import Color
+from ..utils.types import PromptEnsembling
 
 
 @dataclass(kw_only=True)
@@ -31,7 +32,7 @@ def execute(self, highlight_color: Color = "cyan"):
     @torch.inference_mode()
     def apply_to_layer(
         self, layer: int, devices: list[str], world_size: int
-    ) -> dict[str, pd.DataFrame]:
+    ) -> tuple[dict[str, pd.DataFrame], list[dict]]:
         """Evaluate a single reporter on a single layer."""
         device = self.get_device(devices, world_size)
         val_output = self.prepare_data(device, layer, "val")
@@ -42,16 +43,23 @@ def apply_to_layer(
         reporter = torch.load(reporter_path, map_location=device)
 
         row_bufs = defaultdict(list)
+
+        layer_outputs = []
         for ds_name, (val_h, val_gt, _) in val_output.items():
             meta = {"dataset": ds_name, "layer": layer}
 
             val_credences = reporter(val_h)
-            for mode in ("none", "partial", "full"):
+            layer_outputs.append(
+                {**meta, "val_gt": val_gt, "val_credences": val_credences}
+            )
+            for prompt_ensembling in PromptEnsembling.all():
                 row_bufs["eval"].append(
                     {
                         **meta,
-                        "ensembling": mode,
-                        **evaluate_preds(val_gt, val_credences, mode).to_dict(),
+                        "prompt_ensembling": prompt_ensembling.value,
+                        **evaluate_preds(
+                            val_gt, val_credences, prompt_ensembling
+                        ).to_dict(),
                     }
                 )
 
@@ -66,11 +74,13 @@ def apply_to_layer(
                         model.eval()
                         row_bufs["lr_eval"].append(
                             {
-                                "ensembling": mode,
+                                "prompt_ensembling": prompt_ensembling.value,
                                 "inlp_iter": i,
                                 **meta,
-                                **evaluate_preds(val_gt, model(val_h), mode).to_dict(),
+                                **evaluate_preds(
+                                    val_gt, model(val_h), prompt_ensembling
+                                ).to_dict(),
                             }
                         )
 
-        return {k: pd.DataFrame(v) for k, v in row_bufs.items()}
+        return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_outputs)
diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py
@@ -1,10 +1,10 @@
 from dataclasses import asdict, dataclass
-from typing import Literal
 
 import torch
 from einops import repeat
 from torch import Tensor
 
+from ..utils.types import PromptEnsembling
 from .accuracy import AccuracyResult, accuracy_ci
 from .calibration import CalibrationError, CalibrationEstimate
 from .roc_auc import RocAucResult, roc_auc_ci
@@ -41,59 +41,196 @@ def to_dict(self, prefix: str = "") -> dict[str, float]:
         return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict}
 
 
+def calc_auroc(
+    y_logits: Tensor,
+    y_true: Tensor,
+    prompt_ensembling: PromptEnsembling,
+    num_classes: int,
+) -> RocAucResult:
+    """
+    Calculate the AUROC
+
+    Args:
+        y_true: Ground truth tensor of shape (n,).
+        y_logits: Predicted class tensor of shape (n, num_variants, num_classes).
+        prompt_ensembling: The prompt_ensembling mode.
+        num_classes: The number of classes.
+
+    Returns:
+        RocAucResult: A dictionary containing the AUROC and confidence interval.
+    """
+    if prompt_ensembling == PromptEnsembling.NONE:
+        auroc = roc_auc_ci(
+            to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1)
+        )
+    elif prompt_ensembling in (PromptEnsembling.PARTIAL, PromptEnsembling.FULL):
+        # Pool together the negative and positive class logits
+        if num_classes == 2:
+            auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0])
+        else:
+            auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long(), y_logits)
+    else:
+        raise ValueError(f"Unknown mode: {prompt_ensembling}")
+
+    return auroc
+
+
+def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult:
+    """
+    Calculate the calibrated accuracies
+
+    Args:
+        y_true: Ground truth tensor of shape (n,).
+        pos_probs: Predicted class tensor of shape (n, num_variants, num_classes).
+
+    Returns:
+        AccuracyResult: A dictionary containing the accuracy and confidence interval.
+    """
+
+    cal_thresh = pos_probs.float().quantile(y_true.float().mean())
+    cal_preds = pos_probs.gt(cal_thresh).to(torch.int)
+    cal_acc = accuracy_ci(y_true, cal_preds)
+    return cal_acc
+
+
+def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate:
+    """
+    Calculate the expected calibration error.
+
+    Args:
+        y_true: Ground truth tensor of shape (n,).
+        y_logits: Predicted class tensor of shape (n, num_variants, num_classes).
+
+    Returns:
+        CalibrationEstimate:
+    """
+
+    cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten())
+    cal_err = cal.compute()
+    return cal_err
+
+
+def calc_accuracies(y_logits, y_true) -> AccuracyResult:
+    """
+    Calculate the accuracy
+
+    Args:
+        y_true: Ground truth tensor of shape (n,).
+        y_logits: Predicted class tensor of shape (n, num_variants, num_classes).
+
+    Returns:
+        AccuracyResult: A dictionary containing the accuracy and confidence interval.
+    """
+    y_pred = y_logits.argmax(dim=-1)
+    return accuracy_ci(y_true, y_pred)
+
+
 def evaluate_preds(
     y_true: Tensor,
     y_logits: Tensor,
-    ensembling: Literal["none", "partial", "full"] = "none",
+    prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE,
 ) -> EvalResult:
     """
     Evaluate the performance of a classification model.
 
     Args:
-        y_true: Ground truth tensor of shape (N,).
-        y_logits: Predicted class tensor of shape (N, variants, n_classes).
+        y_true: Ground truth tensor of shape (n,).
+        y_logits: Predicted class tensor of shape (n, num_variants, num_classes).
+        prompt_ensembling: The prompt_ensembling mode.
 
     Returns:
         dict: A dictionary containing the accuracy, AUROC, and ECE.
     """
-    (n, v, c) = y_logits.shape
+    (n, num_variants, num_classes) = y_logits.shape
     assert y_true.shape == (n,)
 
-    if ensembling == "full":
+    if prompt_ensembling == PromptEnsembling.FULL:
         y_logits = y_logits.mean(dim=1)
     else:
-        y_true = repeat(y_true, "n -> n v", v=v)
-
-    y_pred = y_logits.argmax(dim=-1)
-    if ensembling == "none":
-        auroc = roc_auc_ci(to_one_hot(y_true, c).long().flatten(1), y_logits.flatten(1))
-    elif ensembling in ("partial", "full"):
-        # Pool together the negative and positive class logits
-        if c == 2:
-            auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0])
-        else:
-            auroc = roc_auc_ci(to_one_hot(y_true, c).long(), y_logits)
-    else:
-        raise ValueError(f"Unknown mode: {ensembling}")
+        y_true = repeat(y_true, "n -> n v", v=num_variants)
+    return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes)
 
-    acc = accuracy_ci(y_true, y_pred)
-    cal_acc = None
-    cal_err = None
 
-    if c == 2:
-        pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0])
+def calc_eval_results(
+    y_true: Tensor,
+    y_logits: Tensor,
+    prompt_ensembling: PromptEnsembling,
+    num_classes: int,
+) -> EvalResult:
+    """
+    Calculate the evaluation results
 
-        # Calibrated accuracy
-        cal_thresh = pos_probs.float().quantile(y_true.float().mean())
-        cal_preds = pos_probs.gt(cal_thresh).to(torch.int)
-        cal_acc = accuracy_ci(y_true, cal_preds)
+    Args:
+        y_true: Ground truth tensor of shape (n,).
+        y_logits: Predicted class tensor of shape (n, num_variants, num_classes).
+        prompt_ensembling: The prompt_ensembling mode.
 
-        cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten())
-        cal_err = cal.compute()
+    Returns:
+        EvalResult: The result of evaluating a classifier containing the accuracy,
+        calibrated accuracies, calibrated errors, and AUROC.
+    """
+    acc = calc_accuracies(y_logits=y_logits, y_true=y_true)
+
+    pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0])
+    cal_acc = (
+        calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs)
+        if num_classes == 2
+        else None
+    )
+    cal_err = (
+        calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs)
+        if num_classes == 2
+        else None
+    )
+
+    auroc = calc_auroc(
+        y_logits=y_logits,
+        y_true=y_true,
+        prompt_ensembling=prompt_ensembling,
+        num_classes=num_classes,
+    )
 
     return EvalResult(acc, cal_acc, cal_err, auroc)
 
 
+def layer_ensembling(
+    layer_outputs: list, prompt_ensembling: PromptEnsembling
+) -> EvalResult:
+    """
+    Return EvalResult after prompt_ensembling the probe output of the middle to last layers
+
+    Args:
+        layer_outputs: A list of dictionaries containing the ground truth and
+        predicted class tensor of shape (n, num_variants, num_classes).
+        prompt_ensembling: The prompt_ensembling mode.
+
+    Returns:
+        EvalResult: The result of evaluating a classifier containing the accuracy,
+        calibrated accuracies, calibrated errors, and AUROC.
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    y_logits_means = []
+    y_true = layer_outputs[0][0]["val_gt"].to(device)
+
+    for layer_output in layer_outputs:
+        y_logits = layer_output[0]["val_credences"].to(device)
+        y_logits_means.append(y_logits.mean(dim=1))  # full prompt_ensembling
+
+    num_classes = layer_outputs[0][0]["val_credences"].shape[2]
+    # get logits and ground_truth from middle to last layer
+    middle_index = len(layer_outputs) // 2
+    y_logits_stacked = torch.stack(y_logits_means[middle_index:])
+    # layer prompt_ensembling of the stacked logits
+    y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0)
+
+    return calc_eval_results(
+        y_true=y_true,
+        y_logits=y_logits_stacked_mean,
+        prompt_ensembling=prompt_ensembling,
+        num_classes=num_classes,
+    )
+
+
 def to_one_hot(labels: Tensor, n_classes: int) -> Tensor:
     """
     Convert a tensor of class labels to a one-hot representation.