Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QDQ Int4 per group quantization #5

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions brevitas/int4_pergroup_qdq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import torch.nn as nn
import torch

def quantize(tensor, scale, zero_point, is_asym=False):
if is_asym:
clamp_min, clamp_max = torch.tensor(0.), torch.tensor(15)
else:
clamp_min, clamp_max = torch.tensor(-7), torch.tensor(7)
quant_tensor = torch.clamp(torch.round(tensor/scale + zero_point), clamp_min, clamp_max)
return quant_tensor

def dequantize(tensor, scale, zero_point):
return (tensor - zero_point) * scale


class QuantLinear(nn.Module):
def __init__(self, in_ch, out_ch, quant_param):
super().__init__()
self.out_ch = out_ch
self.in_ch = in_ch
self.linear = nn.Linear(in_ch, out_ch)
self.group_dim = 1
self.group_size = 32 # Can be parametrized

# Fields name are temporary
# weight_scale has shape [out_ch, in_ch//self.group_size, 1]
po2_weight_scale = torch.tensor(quant_param['weight_scale']).view(quant_param['weight_scale_shape'])
assert po2_weight_scale.dtype == torch.int8
assert po2_weight_scale.shape == [out_ch, in_ch//self.group_size, 1]
weight_scale = torch.pow(2, po2_weight_scale.to(torch.float16)) # Assuming fp16 dtype

# weight_zp has shape [out_ch, in_ch//self.group_size, 1]
weight_zp = torch.tensor(quant_param['weight_zp']).view(quant_param['weight_zp_shape'])
assert quant_param['weight_zp_dtype'] == 'torch.int8', f"Weight Zero-Point dtype should be 'torch.int8', found: {quant_param['weight_zp_dype']}"
assert weight_zp.shape == [out_ch, in_ch//self.group_size, 1]
assert torch.max(weight_zp) <= 15., "Max value is above uint4"

self.register_buffer('weight_scale', weight_scale)
self.register_buffer('weight_zp', weight_zp)


# I.e., "fake quantization"
def qdq_forward(self, x):
weight = weight.view(self.out_ch, self.in_ch//self.group_size, self.group_size)
quant_weight = quantize(weight, self.weight_scale, self.weight_zp, is_asym=True)
dequantized_weight = dequantize(quant_weight, self.weight_scale, self.weight_zp)
# Go from [out_ch, in_ch // group_size, group_size] to [out_ch, in_ch]
dequantized_weight = dequantized_weight.flatten(1)
out = torch.nn.functional.linear(x, dequantized_weight, self.linear.bias)
return out