misc fixes, code for making latex table

plutonium-239 · May 20, 2024 · ec4564a · ec4564a
1 parent 3f7891a
commit ec4564a
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 16 deletions.
diff --git a/experiments/best_results_to_latex.py b/experiments/best_results_to_latex.py
@@ -0,0 +1,55 @@
+"""Simple script to make a latex table from best results"""
+
+import pandas as pd
+
+df = pd.read_csv("results/llm/best_results-transformer-cuda-usage_stats.csv")
+
+df = df.set_index("model")
+df = df[df["case"] != "Conv"]
+
+df["memsave"] = df.index.str.startswith("memsave_")
+badi = df.index.map(
+    lambda x: x.split("memsave_", 1)[1] if x.startswith("memsave") else x
+)
+badi.name = "model_clean"
+df2 = df.reset_index().set_index(badi).sort_index()
+divs = df2[(df2["case"] == "All") & (~df2["memsave"])]
+df2["Scaled M"] = df2["Memory Usage (GB)"] / divs["Memory Usage (GB)"]
+df2["Scaled T"] = df2["Time Taken (s)"] / divs["Time Taken (s)"]
+
+df2["Memory [GiB]"] = df2.apply(
+    lambda x: f"{x['Memory Usage (GB)']:.2f} ({x['Scaled M']:.2f})", axis=1
+)
+df2["Time [s]"] = df2.apply(
+    lambda x: f"{x['Time Taken (s)']:.2f} ({x['Scaled T']:.2f})", axis=1
+)
+
+
+def _format_name(n):
+    if n.startswith("memsave_"):
+        mname = n.split("memsave_", 1)[1]
+        return f"{mname.capitalize()} + MemSave"
+    return n.capitalize()
+
+
+ni = df2["model"].apply(_format_name)
+df2 = (
+    df2.set_index(ni)
+    .sort_index()
+    .drop(
+        columns=[
+            "model",
+            "memsave",
+            "Memory Usage (GB)",
+            "Time Taken (s)",
+            "Scaled M",
+            "Scaled T",
+        ]
+    )
+)
+
+df2_p = df2.pivot_table(
+    index="model", columns="case", values=df2.columns[1:], aggfunc=lambda x: x
+)
+
+print(df2_p.to_latex(na_rep="-"))
diff --git a/experiments/get_best_results.py b/experiments/get_best_results.py
@@ -16,14 +16,22 @@ def main(base_dir: str):
     Args:
         base_dir (str): The base results dir
     """
+    # Don't recognize None as NaN
+    custom_na_values = pd._libs.parsers.STR_NA_VALUES - {"None"}
     for device, arch in product(["cuda", "cpu"], ["linear", "conv", "transformer"]):
         # usage stats
         df = None
         idx_col = ["model", "case"]
         for fname in glob(os.path.join(base_dir, f"usage_stats-{arch}-{device}-*.csv")):
             with open(fname) as f:
-                f.readline()
-                temp_df = pd.read_csv(f, index_col=idx_col)
+                # f.readline()
+                temp_df = pd.read_csv(
+                    f,
+                    index_col=idx_col,
+                    header=1,
+                    na_values=custom_na_values,
+                    keep_default_na=False,
+                )
             df = temp_df if df is None else pd.concat([df, temp_df])
         if df is not None:
             df = df.rename(index=case_mapping, level=1)

diff --git a/experiments/paper_demo.py b/experiments/paper_demo.py
@@ -98,25 +98,12 @@
         "no_grad_norm_weights",
         "no_grad_norm_bias",
     ],
-    [  # CONV
-        "no_grad_linear_weights",
-        "no_grad_linear_bias",
-        "no_grad_norm_weights",
-        "no_grad_norm_bias",
-    ],
     [  # NORM
         "no_grad_conv_weights",
         "no_grad_conv_bias",
         "no_grad_linear_weights",
         "no_grad_linear_bias",
     ],
-    [  # LLM
-        "grad_input",
-        "no_grad_linear_weights",
-        "no_grad_linear_bias",
-        "no_grad_norm_weights",
-        "no_grad_norm_bias",
-    ],
 ]
 
 
@@ -137,6 +124,8 @@
         )
 
         for model in models:
+            if model == "flan-t5":
+                batch_size = 56
             for estimate in estimators:
                 outputs = []
 

diff --git a/experiments/util/measurements.py b/experiments/util/measurements.py
@@ -387,7 +387,7 @@ def check_lm_head(n) -> bool:
         Returns:
             bool: Whether n is a LM head
         """
-        lm_head_name = getattr(model, 'lm_head_name', None)
+        lm_head_name = getattr(model, "lm_head_name", None)
         return lm_head_name is not None and lm_head_name in n
 
     layers = [