plutonium-239 · plutonium-239 · Aug 22, 2024 · Aug 22, 2024 · Jul 4, 2024 · Jul 4, 2024
diff --git a/experiments/poster_barplot.py b/experiments/poster_barplot.py
@@ -0,0 +1,112 @@
+"""Script to plot bar graphs of savings as seen in the poster"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from tueplots import bundles
+
+df = pd.read_csv("results/paper/poster.csv")
+df["Scaled M str"] = df["Scaled M"].apply(lambda x: f"{x:.2f}")
+df["M str"] = df["Memory Usage (GB)"].apply(lambda x: f"{x:.2f}")
+
+memsave_map = {False: "PyTorch", True: "+ MemSave"}
+df["colors"] = df["memsave"].apply(lambda x: memsave_map[x])
+color_map = {memsave_map[False]: "#F05F42", memsave_map[True]: "#00E1D2"}
+
+# fig = px.bar(df, x='case', y='Scaled M', color='colors', text='M str',
+#     category_orders={'case': ['All', 'Input', 'Norm', 'SurgicalFirst']},
+#     barmode='group', facet_col='model_clean', facet_col_wrap=3,
+#     color_discrete_map={memsave_map[False]: '#F05F42', memsave_map[True]: '#00E1D2'},
+# )
+
+# fig.update_traces(width=0.6)
+# fig.show()
+
+width = 0.4
+df["color_val"] = df["colors"].apply(lambda x: color_map[x])
+
+names = {
+    "bert": "BERT",
+    "bart": "BART",
+    "roberta": "RoBERTa",
+    "gpt2": "GPT-2",
+    "t5": "T5",
+    "flan-t5": "FLAN-T5",
+    "mistral-7b": "Mistral-7B",
+    "transformer": "Transformer",
+    "llama3-8b": "LLaMa3-8B",
+    "phi3-4b": "Phi3-4B",
+    # Conv
+    "resnet101": "ResNet-101",
+    "deeplabv3_resnet101": "DeepLabv3 (RN101)",
+    "efficientnet_v2_l": "EfficientNetv2-L",
+    "fcn_resnet101": "FCN (RN101)",
+    "mobilenet_v3_large": "MobileNetv3-L",
+    "resnext101_64x4d": "ResNeXt101-64x4d",
+    "fasterrcnn_resnet50_fpn_v2": "Faster-RCNN (RN101)",
+    "ssdlite320_mobilenet_v3_large": "SSDLite (MobileNetv3-L)",
+    "vgg16": "VGG-16",
+}
+
+for chosen_model in ["resnet101", "efficientnet_v2_l", "mistral-7b", "t5"]:
+    df_model = df[df["model_clean"] == chosen_model]
+    with plt.rc_context(bundles.icml2024(column="full")):
+        fig, ax = plt.subplots()
+        # ax.set_xlabel("Case", size='large')
+        ax.set_ylabel("Peak memory [GiB]", size="large")
+        cases = []
+        for i, (case, group) in enumerate(df_model.groupby("case")):
+            cases.append(case)
+            for j, (memsave, mg) in enumerate(group.groupby("memsave")):  # noqa: B007
+                r = ax.bar(
+                    i + j * width,
+                    mg["Memory Usage (GB)"],
+                    width,
+                    label=mg["colors"].item(),
+                    color=mg["color_val"],
+                )
+                ax.bar_label(r, mg["Scaled M str"], padding=-20, size="x-large")
+                yoff = mg["Memory Usage (GB)"].item() * 0.05
+                if r[0].get_height() < 5:
+                    ax.text(
+                        i + j * width,
+                        r[0].get_height() + yoff,
+                        mg["colors"].item(),
+                        ha="center",
+                        va="bottom",
+                        rotation="vertical",
+                        size="x-large",
+                    )
+                else:
+                    ax.text(
+                        i + j * width,
+                        yoff,
+                        mg["colors"].item(),
+                        ha="center",
+                        va="bottom",
+                        rotation="vertical",
+                        size="x-large",
+                    )
+
+                # ax.bar(i + width, group['Scaled M'], width, label=group['M str'])
+            # ax.bar_label(rects, padding=3)
+
+            # for memsave, sub_group in group.groupby('memsave'):
+            #     ax.plot(sub_group['case'], sub_group['Memory Usage (GB)'], marker='o', linestyle=linestyle, color=color, label=f'{model_clean} - {"memsave" if memsave else "no memsave"}')
+            #     for j, txt in enumerate(sub_group['Scaled M str']):
+            #         ax.annotate(txt, (sub_group['case'].iloc[j], sub_group['Memory Usage (GB)'].iloc[j]))
+
+        ax.set_xticks(np.arange(len(cases)) + width / 2, cases)
+        ax.tick_params(labelsize="x-large")
+        ax.set_title(names[chosen_model], fontsize="xx-large", fontweight=1000)
+        # handles, labels = ax.get_legend_handles_labels()
+        # unique = [(h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i]]
+        # ax.legend(*zip(*unique))
+
+        # ax.legend()
+        # fig.show()
+        # fig.waitforbuttonpress()
+        plt.savefig(
+            f"results/paper/poster_plot_{chosen_model}.pdf",
+            bbox_inches="tight",
+        )
diff --git a/experiments/visual_abstract/.gitignore b/experiments/visual_abstract/.gitignore
@@ -0,0 +1,2 @@
+!raw/**.txt
+!gathered/**.csv
diff --git a/experiments/visual_abstract/README.md b/experiments/visual_abstract/README.md
@@ -0,0 +1,17 @@
+To reproduce this experiment,
+
+1. Run
+  ```bash
+  python generate_data.py
+  ```
+  You may have to set `skip_existing=False`, otherwise runs for which data already exists will be skipped.
+
+2. Gather the results
+  ```bash
+  python gather_data.py
+  ```
+
+3. Plot the results
+  ```bash
+  python plot.py
+  ```
diff --git a/experiments/visual_abstract/gather_data.py b/experiments/visual_abstract/gather_data.py
@@ -5,37 +5,52 @@
 
 from pandas import DataFrame
 
-HERE = path.abspath(__file__)
-HEREDIR = path.dirname(HERE)
+HEREDIR = path.dirname(path.abspath(__file__))
 RAWDATADIR = path.join(HEREDIR, "raw")
 DATADIR = path.join(HEREDIR, "gathered")
 makedirs(RAWDATADIR, exist_ok=True)
 makedirs(DATADIR, exist_ok=True)
 
 max_num_layers = 10
-requires_grads = ["all", "none", "4", "4+"]
-implementations = ["torch", "ours"]
+requires_grads = {"all", "none", "4", "4+"}
+implementations = {"torch", "ours"}
+architectures = {
+    "linear",
+    "conv1d",
+    "conv2d",
+    "conv3d",
+    "bn2d",
+    "conv_transpose1d",
+    "conv_transpose2d",
+    "conv_transpose3d",
+}
+modes = {"eval", "train"}
+use_compiles = {False, True}
 
 if __name__ == "__main__":
-    for implementation, requires_grad in product(implementations, requires_grads):
+    for implementation, requires_grad, architecture, mode, use_compile in product(
+        implementations, requires_grads, architectures, modes, use_compiles
+    ):
         if implementation == "ours" and requires_grad != "4":
             continue
 
-        layers = list(range(1, max_num_layers + 1))
         peakmems = []
+        layers = list(range(1, max_num_layers + 1))
         for num_layers in layers:
-            with open(
-                path.join(
-                    RAWDATADIR,
-                    f"peakmem_implementation_{implementation}_num_layers_{num_layers}_requires_grad_{requires_grad}.txt",
-                ),
-                "r",
-            ) as f:
+            readpath = path.join(
+                RAWDATADIR,
+                f"peakmem_{architecture}_mode_{mode}_implementation_{implementation}"
+                + f"_num_layers_{num_layers}_requires_grad_{requires_grad}"
+                f"{'_use_compile' if use_compile else ''}.txt",
+            )
+            with open(readpath, "r") as f:
                 peakmems.append(float(f.read()))
 
         df = DataFrame({"num_layers": layers, "peakmem": peakmems})
         savepath = path.join(
             DATADIR,
-            f"peakmem_implementation_{implementation}_requires_grad_{requires_grad}.csv",
+            f"peakmem_{architecture}_mode_{mode}_implementation_{implementation}"
+            + f"_requires_grad_{requires_grad}{'_use_compile' if use_compile else ''}"
+            + ".csv",
         )
         df.to_csv(savepath, index=False)
diff --git a/...s/visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_ours_requires_grad_4.csv b/...s/visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_ours_requires_grad_4.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1210.33984375
+2,1722.51953125
+3,1724.1484375
+4,1725.0546875
+5,2235.15625
+6,2237.20703125
+7,2237.55859375
+8,2237.51171875
+9,2236.9921875
+10,2237.44921875
diff --git a/...visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4+.csv b/...visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4+.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1212.015625
+2,1721.953125
+3,1724.4296875
+4,1723.90234375
+5,2232.765625
+6,2747.5703125
+7,3259.1484375
+8,3771.74609375
+9,4284.9921875
+10,4793.58984375
diff --git a/...act/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4+_use_compile.csv b/...act/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4+_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1623.7890625
+2,1624.125
+3,1623.55859375
+4,2147.23046875
+5,2658.2109375
+6,3172.609375
+7,3685.63671875
+8,4198.0078125
+9,4709.9375
+10,5223.6484375
diff --git a/.../visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4.csv b/.../visual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1209.75
+2,1721.83203125
+3,1722.42578125
+4,1724.9296875
+5,2236.58984375
+6,2748.8203125
+7,3259.17578125
+8,3769.546875
+9,4279.85546875
+10,4795.05078125
diff --git a/...ract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4_use_compile.csv b/...ract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_4_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1620.76171875
+2,1624.90234375
+3,1623.28515625
+4,2145.8203125
+5,2146.39453125
+6,2148.33984375
+7,2149.27734375
+8,2149.3828125
+9,2150.59375
+10,2151.4921875
diff --git a/...isual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_all.csv b/...isual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_all.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1212.51171875
+2,1725.0234375
+3,2236.99609375
+4,2749.53515625
+5,3261.1875
+6,3773.234375
+7,4284.86328125
+8,4796.8671875
+9,5309.7734375
+10,5821.6875
diff --git a/...ct/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_all_use_compile.csv b/...ct/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_all_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1632.88671875
+2,2146.1015625
+3,2658.85546875
+4,3170.89453125
+5,3685.31640625
+6,4198.48046875
+7,4710.48828125
+8,5223.71484375
+9,5737.0703125
+10,6248.1953125
diff --git a/...sual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_none.csv b/...sual_abstract/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_none.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1205.87890625
+2,1720.671875
+3,1722.5625
+4,1724.53125
+5,1723.91796875
+6,1724.0078125
+7,1723.90234375
+8,1724.80859375
+9,1723.9375
+10,1724.43359375
diff --git a/...t/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_none_use_compile.csv b/...t/gathered/peakmem_bn2d_mode_eval_implementation_torch_requires_grad_none_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1623.578125
+2,1625.23046875
+3,1623.54296875
+4,1624.38671875
+5,1625.890625
+6,1625.9375
+7,1626.19921875
+8,1626.609375
+9,1627.87109375
+10,1626.35546875
diff --git a/.../visual_abstract/gathered/peakmem_bn2d_mode_train_implementation_ours_requires_grad_4.csv b/.../visual_abstract/gathered/peakmem_bn2d_mode_train_implementation_ours_requires_grad_4.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1211.921875
+2,1724.1640625
+3,1723.31640625
+4,1725.05078125
+5,2232.4296875
+6,2748.28515625
+7,3258.1640625
+8,3771.3359375
+9,4284.6015625
+10,4796.40625
diff --git a/...isual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4+.csv b/...isual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4+.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1210.81640625
+2,1724.125
+3,1724.16015625
+4,1721.98046875
+5,2236.72265625
+6,2745.14453125
+7,3256.703125
+8,3770.43359375
+9,4285.1171875
+10,4792.6328125
diff --git a/...ct/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4+_use_compile.csv b/...ct/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4+_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1625.4921875
+2,2136.359375
+3,2649.94140625
+4,3683.1015625
+5,4710.7109375
+6,5737.55859375
+7,6761.96484375
+8,7788.95703125
+9,8812.8203125
+10,9836.51953125
diff --git a/...visual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4.csv b/...visual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1212.6796875
+2,1724.00390625
+3,1724.50390625
+4,1725.6796875
+5,2236.87890625
+6,2747.453125
+7,3258.3984375
+8,3770.7265625
+9,4282.140625
+10,4795.65625
diff --git a/...act/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4_use_compile.csv b/...act/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_4_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1625.41015625
+2,2136.953125
+3,2649.83203125
+4,3683.80078125
+5,4709.12109375
+6,5735.45703125
+7,6756.390625
+8,7788.4609375
+9,8814.35546875
+10,9836.36328125
diff --git a/...sual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_all.csv b/...sual_abstract/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_all.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1213.3984375
+2,1725.0625
+3,2237.703125
+4,2749.45703125
+5,3261.8671875
+6,3773.328125
+7,4286.08203125
+8,4797.41796875
+9,5309.5390625
+10,5821.81640625
diff --git a/...t/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_all_use_compile.csv b/...t/gathered/peakmem_bn2d_mode_train_implementation_torch_requires_grad_all_use_compile.csv
@@ -0,0 +1,11 @@
+num_layers,peakmem
+1,1633.6875
+2,2658.55078125
+3,3684.03125
+4,4708.41015625
+5,5736.94921875
+6,6759.53125
+7,7786.265625
+8,8811.86328125
+9,9835.44921875
+10,10861.7421875