diff --git a/experiments/util/measurements.py b/experiments/util/measurements.py index 6a360b9..1e0810d 100644 --- a/experiments/util/measurements.py +++ b/experiments/util/measurements.py @@ -26,6 +26,7 @@ Parameter, Embedding ) +from transformers import Conv1D from torchvision.models.convnext import LayerNorm2d from memsave_torch.nn.Conv2d import MemSaveConv2d @@ -322,7 +323,7 @@ def separate_grad_arguments( Raises: NotImplementedError: If an unknown layer with parameters is encountered. """ - linear = (Linear, MemSaveLinear) + linear = (Linear, MemSaveLinear, Conv1D) conv = ( Conv1d, Conv2d, @@ -346,7 +347,7 @@ def separate_layer(layer: Module, grad_weight: bool, grad_bias: bool): grad_bias: Whether to compute the gradient of the layer bias. """ leafs.append(layer.weight) if grad_weight else no_leafs.append(layer.weight) - if layer.bias is not None: + if 'bias' in layer._parameters and layer.bias is not None: leafs.append(layer.bias) if grad_bias else no_leafs.append(layer.bias) layers = [m for m in model.modules() if len(list(m.modules())) == 1]