Skip to content

Commit

Permalink
Merge branch 'main' into bn-eval
Browse files Browse the repository at this point in the history
  • Loading branch information
plutonium-239 authored Aug 22, 2024
2 parents 1d85817 + ea97d77 commit 1062dd2
Show file tree
Hide file tree
Showing 12 changed files with 297 additions and 44 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
*.txt
*.csv
!requirements.txt
torchviz-output/
torchview-output/

# generated docs
docs_src/_build/
Expand Down
18 changes: 13 additions & 5 deletions experiments/get_best_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pandas as pd

from experiments.util.collect_results import case_mapping
from experiments.util.collect_results import case_inv_mapping


def main(base_dir: str):
Expand All @@ -16,17 +16,25 @@ def main(base_dir: str):
Args:
base_dir (str): The base results dir
"""
for device, arch in product(["cuda", "cpu"], ["linear", "conv"]):
# Don't recognize None as NaN
custom_na_values = pd._libs.parsers.STR_NA_VALUES - {"None"}
for device, arch in product(["cuda", "cpu"], ["linear", "conv", "transformer"]):
# usage stats
df = None
idx_col = ["model", "case"]
for fname in glob(os.path.join(base_dir, f"usage_stats-{arch}-{device}-*.csv")):
with open(fname) as f:
f.readline()
temp_df = pd.read_csv(f, index_col=idx_col)
# f.readline()
temp_df = pd.read_csv(
f,
index_col=idx_col,
header=1,
na_values=custom_na_values,
keep_default_na=False,
)
df = temp_df if df is None else pd.concat([df, temp_df])
if df is not None:
df = df.rename(index=case_mapping, level=1)
df = df.rename(index=case_inv_mapping, level=1)
df["Memory Usage (GB)"] = df["Memory Usage (MB)"] / 1024
df = df.drop(columns=["Memory Usage (MB)"])
best_results = df.groupby(idx_col).min()
Expand Down
2 changes: 1 addition & 1 deletion experiments/paper_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
# num_classes = 1000
# device = "cuda"
# architecture = "conv"
# cases = collect_results.select_cases(['All', 'Input', 'Conv', 'Norm', 'SurgicalFirst', 'SurgicalLast'])
# cases = collect_results.select_cases(['All', 'Input', 'Conv', 'Norm'])

# ============== TRANSFORMER CONFIG ==============
# Valid choices for models are in models.transformer_model_fns
Expand Down
56 changes: 49 additions & 7 deletions experiments/util/collect_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,50 @@
],
}

case_mapping = {
"None": "All",
"grad_input + no_grad_conv_weights + no_grad_conv_bias + no_grad_linear_weights + no_grad_linear_bias + no_grad_norm_weights + no_grad_norm_bias": "Input",
"no_grad_linear_weights + no_grad_linear_bias + no_grad_norm_weights + no_grad_norm_bias": "Conv",
"no_grad_conv_weights + no_grad_conv_bias + no_grad_linear_weights + no_grad_linear_bias": "Norm",
cases = {
"All": None, # ALL
"Input": [ # INPUT
"grad_input",
"no_grad_conv_weights",
"no_grad_conv_bias",
"no_grad_linear_weights",
"no_grad_linear_bias",
"no_grad_norm_weights",
"no_grad_norm_bias",
],
"Conv": [ # CONV
"no_grad_linear_weights",
"no_grad_linear_bias",
"no_grad_norm_weights",
"no_grad_norm_bias",
],
"Linear": [ # LINEAR
"no_grad_conv_weights",
"no_grad_conv_bias",
"no_grad_norm_weights",
"no_grad_norm_bias",
],
"Norm": [ # NORM
"no_grad_conv_weights",
"no_grad_conv_bias",
"no_grad_linear_weights",
"no_grad_linear_bias",
],
}


def select_cases(selected: List[str]) -> List[Union[List[str], None]]:
"""Helper function to return cases selected by their names
Args:
selected (List[str]): Which cases to select, strings can be keys of the cases table
Returns:
List[Union[List[str], None]]: Selected cases
"""
return [cases[s] for s in selected]


def make_case_str(case: Union[None, List[str]]) -> str:
"""Format case into a string
Expand All @@ -47,6 +83,9 @@ def make_case_str(case: Union[None, List[str]]) -> str:
return "None" if case is None else " + ".join(case)


case_inv_mapping = {make_case_str(v): k for k, v in cases.items()}


def hyperparam_str(args: SimpleNamespace) -> str:
"""Format hyperparams into a string
Expand Down Expand Up @@ -172,12 +211,15 @@ def _display_run(
"""
# print(f"{model} input ({input_channels},{input_HW},{input_HW}) {device}")
# print('='*78)
s = f"{model} input ({self.batch_size},{self.input_channels},{self.input_HW},{self.input_HW}) {self.device}"
if self.architecture == "conv":
s = f"{model} input ({self.batch_size},{self.input_channels},{self.input_HW},{self.input_HW}) {self.device}"
elif self.architecture == "transformer":
s = f"{model} input ({self.batch_size},{self.input_HW},{self.input_channels}(or model hidden size)) {self.device}"
print(s.center(78, "="))

for out, case in zip(outputs, self.cases):
print(
f"{strings[estimate][1]} ({case_mapping[make_case_str(case)]}): {out:.3f}{strings[estimate][0]}"
f"{strings[estimate][1]} ({case_inv_mapping[make_case_str(case)]}): {out:.3f}{strings[estimate][0]}"
)

# CODE ONLY APPLIES WITH OLD RUNDEMO.PY
Expand Down
51 changes: 48 additions & 3 deletions experiments/util/estimate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
"no_grad_norm_bias",
"grad_input",
"no_grad_input",
"grad_embed_weights",
"no_grad_embed_weights",
]


Expand All @@ -62,7 +64,10 @@ def parse_case(case: Optional[List[str]]) -> Dict[str, bool]:


def skip_case_check(args: argparse.Namespace) -> bool:
"""Decide whether to skip the case (when case has grad_norm_* but model does not have any normalization layers)
"""Decide whether to skip the case:
1. when case has grad_norm_* but model does not have any normalization layers
2. when case has no_grad_embed_weights but no grad_input: there is a backward error (no input requires_grad)
Args:
args (argparse.Namespace): args
Expand All @@ -73,12 +78,16 @@ def skip_case_check(args: argparse.Namespace) -> bool:
invalid = False
if args.case is None:
return invalid
# 1.
for c in ["grad_norm_bias", "grad_norm_weights"]:
if c in args.case and args.model in models.models_without_norm:
invalid = True
for c in ["no_grad_norm_bias", "no_grad_norm_weights"]:
if c not in args.case and args.model in models.models_without_norm:
invalid = True
# 2.
if "no_grad_embed_weights" in args.case and "grad_input" not in args.case:
invalid = True
if invalid:
if args.print:
print("-1")
Expand Down Expand Up @@ -226,7 +235,7 @@ def estimate_mem_savings(
type=str,
required=True,
help="Which architecture to run",
choices=["conv", "linear"],
choices=["conv", "linear", "transformer", "VLM"],
)
parser.add_argument(
"--estimate",
Expand Down Expand Up @@ -275,23 +284,59 @@ def estimate_mem_savings(
input_shape = (args.input_channels, args.input_hw, args.input_hw)
models.conv_input_shape = input_shape
model_fn = models.conv_model_fns.get(args.model)
y_args = {"size": (batch_size,), "low": 0, "high": num_classes}
assert (
model_fn is not None
), f"Conv model name {args.model} not found, must be one of {list(models.conv_model_fns.keys())}"
elif args.architecture == "linear":
input_shape = [args.input_hw**2]
models.linear_input_shape = input_shape[0]
model_fn = models.linear_model_fns.get(args.model)
y_args = {"size": (batch_size,), "low": 0, "high": num_classes}
assert (
model_fn is not None
), f"Linear model name {args.model} not found, must be one of {list(models.linear_model_fns.keys())}"
elif args.architecture == "transformer":
vocab_dim = args.num_classes
embed_dim = args.input_channels
seq_len = args.input_hw
model_fn = models.transformer_model_fns.get(args.model)
if args.model in models.hf_transformers_models:
model_fn_orig = model_fn
model_fn = lambda: models.TransformersModelWrapper( # noqa: E731
model_fn_orig, args.model
)
config = models.get_transformers_config(args.model)
# as per transformers.PretrainedConfig these 2 should be present in all models:
vocab_dim = config.vocab_size
embed_dim = config.hidden_size
models.transformer_input_shape = (vocab_dim, embed_dim)
input_shape = [seq_len, embed_dim]
y_args = {"size": (batch_size, seq_len), "low": 0, "high": vocab_dim}
assert (
model_fn is not None
), f"Transformer model name {args.model} not found, must be one of {list(models.transformer_model_fns.keys())}"
elif args.architecture == "VLM":
# model format: `vlm!<vis_model>!<vis_model_arch>!<llm>`
# eg: `vlm!vit!transformer!memsave_gpt2`
is_vlm, vis_model, vis_model_arch, llm = args.model.split("!")
assert is_vlm == "vlm"
assert vis_model_arch in ["transformer", "conv"]
model_fn = lambda: models.VLM(vis_model, vis_model_arch, llm) # noqa: E731
config = models.get_transformers_config(llm)
vocab_dim = config.vocab_size
embed_dim = config.hidden_size
seq_len = (args.input_hw // 16) ** 2
y_args = {"size": (batch_size, seq_len), "low": 0, "high": vocab_dim}
input_shape = (args.input_channels, args.input_hw, args.input_hw)
models.conv_input_shape = input_shape

loss_fn = CrossEntropyLoss

manual_seed(0) # make deterministic

x = rand(batch_size, *input_shape, device=dev)
y = randint(size=(batch_size,), low=0, high=num_classes, device=dev)
y = randint(**y_args, device=dev)
targets = None
if args.model in models.detection_models:
# pred is a dictionary of losses
Expand Down
Loading

0 comments on commit 1062dd2

Please sign in to comment.