From 1516933322cf049c9a761ab318ee6ce4289c83c6 Mon Sep 17 00:00:00 2001 From: Bharath Ramaswamy Date: Thu, 12 Sep 2024 09:52:55 +0000 Subject: [PATCH] Added documentation for release 1.33.0 Signed-off-by: Bharath Ramaswamy --- .../Examples/onnx/quantization/adaround.html | 58 ++--- .../Examples/onnx/quantization/cle.html | 54 ++--- .../Examples/onnx/quantization/quantsim.html | 54 ++--- .../compression/channel_pruning.html | 46 ++-- .../tensorflow/compression/spatial_svd.html | 50 ++-- .../spatial_svd_channel_pruning.html | 58 ++--- .../tensorflow/quantization/adaround.html | 58 ++--- .../tensorflow/quantization/autoquant.html | 66 +++-- .../quantization/bn_reestimation.html | 74 +++--- .../tensorflow/quantization/cle_bc.html | 62 +++-- .../quantization/keras/adaround.html | 50 ++-- .../quantization/keras/autoquant.html | 62 +++-- .../quantization/keras/bn_reestimation.html | 62 +++-- .../keras/keras_transformer_qat.html | 18 +- .../quantization/keras/model_preparer.html | 38 ++- .../tensorflow/quantization/keras/qat.html | 167 ++++++------- .../tensorflow/quantization/keras/qat.ipynb | 225 ++++++------------ .../keras/qat_range_learning.html | 166 ++++++------- .../keras/qat_range_learning.ipynb | 225 ++++++------------ .../quantization/keras/quant_analyzer.html | 54 ++--- .../keras/quantsim_adaround_pcq.html | 50 ++-- .../quantization/keras/quantsim_cle.html | 62 +++-- .../quantization/keras/quantsim_cle.ipynb | 10 +- .../Examples/tensorflow/quantization/qat.html | 58 ++--- .../quantization/qat_range_learning.html | 58 ++--- .../quantization/quant_analyzer.html | 54 ++--- .../torch/compression/channel_pruning.html | 50 ++-- .../torch/compression/spatial_svd.html | 50 ++-- .../spatial_svd_channel_pruning.html | 58 ++--- .../Examples/torch/quantization/adaround.html | 50 ++-- .../torch/quantization/autoquant.html | 58 ++--- .../torch/quantization/bn_reestimation.html | 66 +++-- .../Examples/torch/quantization/cle_bc.html | 58 ++--- .../Examples/torch/quantization/qat.html | 54 ++--- .../quantization/qat_range_learning.html | 54 ++--- .../torch/quantization/quant_analyzer.html | 54 ++--- .../aimet_common/bias_correction.html | 7 - .../1.33.0/_modules/aimet_common/defs.html | 7 - .../1.33.0/_modules/aimet_common/utils.html | 7 - .../adaround/adaround_weight.html | 7 - .../_modules/aimet_tensorflow/auto_quant.html | 7 - .../aimet_tensorflow/batch_norm_fold.html | 7 - .../aimet_tensorflow/bias_correction.html | 7 - .../aimet_tensorflow/bn_reestimation.html | 7 - .../_modules/aimet_tensorflow/compress.html | 7 - .../cross_layer_equalization.html | 7 - .../_modules/aimet_tensorflow/defs.html | 7 - .../keras/batch_norm_fold.html | 7 - .../keras/bn_reestimation.html | 7 - .../aimet_tensorflow/keras/compress.html | 7 - .../keras/cross_layer_equalization.html | 7 - .../keras/layer_output_utils.html | 7 - .../keras/model_preparer.html | 28 +-- .../keras/quant_analyzer.html | 7 - .../aimet_tensorflow/keras/quantsim.html | 7 - .../aimet_tensorflow/layer_output_utils.html | 7 - .../aimet_tensorflow/plotting_utils.html | 7 - .../aimet_tensorflow/quant_analyzer.html | 7 - .../_modules/aimet_tensorflow/quantsim.html | 7 - .../1.33.0/_modules/aimet_tensorflow/svd.html | 7 - .../utils/convert_tf_sess_to_keras.html | 7 - .../aimet_tensorflow/utils/graph.html | 7 - .../aimet_torch/adaround/adaround_weight.html | 7 - .../_modules/aimet_torch/auto_quant.html | 7 - .../_modules/aimet_torch/batch_norm_fold.html | 7 - .../_modules/aimet_torch/bias_correction.html | 7 - .../_modules/aimet_torch/bn_reestimation.html | 7 - .../1.33.0/_modules/aimet_torch/compress.html | 7 - .../aimet_torch/cross_layer_equalization.html | 7 - .../1.33.0/_modules/aimet_torch/defs.html | 7 - .../aimet_torch/layer_output_utils.html | 7 - .../_modules/aimet_torch/model_preparer.html | 9 +- .../1.33.0/_modules/aimet_torch/peft.html | 14 +- .../_modules/aimet_torch/quant_analyzer.html | 7 - .../1.33.0/_modules/aimet_torch/quantsim.html | 10 +- .../_modules/aimet_torch/v2/nn/base.html | 7 - .../aimet_torch/v2/nn/fake_quant.html | 9 +- .../aimet_torch/v2/nn/true_quant.html | 7 - .../v2/quantization/affine/backends.html | 7 - .../v2/quantization/affine/quantizer.html | 7 - .../v2/quantization/base/quantizer.html | 7 - .../v2/quantization/encoding_analyzer.html | 7 - .../v2/quantization/float/quantizer.html | 7 - .../aimet_torch/v2/quantization/tensor.html | 108 +++------ .../aimet_torch/v2/quantsim/config_utils.html | 7 - .../_modules/aimet_torch/visualize_model.html | 7 - .../visualize_serialized_data.html | 7 - releases/1.33.0/_modules/index.html | 7 - releases/1.33.0/_static/basic.css | 47 ++-- .../api_docs/convert_tf_sess_to_keras.html | 48 ++-- releases/1.33.0/api_docs/index.html | 18 +- releases/1.33.0/api_docs/keras.html | 14 +- releases/1.33.0/api_docs/keras_adaround.html | 40 ++-- .../keras_batchnorm_re_estimation.html | 46 ++-- .../1.33.0/api_docs/keras_compression.html | 58 ++--- .../keras_cross_layer_equalization.html | 38 ++- .../keras_layer_output_generation.html | 28 +-- .../api_docs/keras_model_guidelines.html | 14 +- .../1.33.0/api_docs/keras_model_preparer.html | 28 +-- .../api_docs/keras_primitive_apis_cle.html | 56 ++--- .../1.33.0/api_docs/keras_quant_analyzer.html | 26 +- .../1.33.0/api_docs/keras_quantization.html | 14 +- releases/1.33.0/api_docs/keras_quantsim.html | 32 +-- releases/1.33.0/api_docs/onnx.html | 7 - releases/1.33.0/api_docs/onnx_adaround.html | 7 - releases/1.33.0/api_docs/onnx_auto_quant.html | 7 - .../onnx_cross_layer_equalization.html | 7 - .../onnx_layer_output_generation.html | 7 - .../1.33.0/api_docs/onnx_quant_analyzer.html | 7 - .../1.33.0/api_docs/onnx_quantization.html | 7 - releases/1.33.0/api_docs/onnx_quantsim.html | 7 - .../quantization_encoding_specification.html | 61 ++--- releases/1.33.0/api_docs/tensorflow.html | 14 +- .../1.33.0/api_docs/tensorflow_adaround.html | 46 ++-- .../api_docs/tensorflow_auto_quant.html | 42 ++-- .../tensorflow_batchnorm_re_estimation.html | 38 ++- .../api_docs/tensorflow_bias_correction.html | 62 +++-- .../1.33.0/api_docs/tensorflow_compress.html | 90 ++++--- .../tensorflow_cross_layer_equalization.html | 44 ++-- .../tensorflow_layer_output_generation.html | 28 +-- .../api_docs/tensorflow_model_guidelines.html | 20 +- .../tensorflow_primitive_apis_cle.html | 84 +++---- .../api_docs/tensorflow_quant_analyzer.html | 26 +- .../api_docs/tensorflow_quantization.html | 14 +- .../1.33.0/api_docs/tensorflow_quantsim.html | 44 ++-- ...tensorflow_visualization_quantization.html | 30 +-- releases/1.33.0/api_docs/torch.html | 14 +- releases/1.33.0/api_docs/torch_adaround.html | 46 ++-- .../api_docs/torch_architecture_checker.html | 28 +-- .../1.33.0/api_docs/torch_auto_quant.html | 32 +-- .../torch_batchnorm_re_estimation.html | 42 ++-- .../api_docs/torch_bias_correction.html | 48 ++-- releases/1.33.0/api_docs/torch_compress.html | 86 +++---- .../torch_cross_layer_equalization.html | 42 ++-- .../torch_layer_output_generation.html | 32 +-- .../api_docs/torch_model_guidelines.html | 14 +- .../1.33.0/api_docs/torch_model_preparer.html | 32 +-- .../api_docs/torch_model_validator.html | 14 +- releases/1.33.0/api_docs/torch_multi_gpu.html | 14 +- releases/1.33.0/api_docs/torch_peft_lora.html | 46 ++-- .../api_docs/torch_primitive_apis_cle.html | 72 +++--- .../1.33.0/api_docs/torch_quant_analyzer.html | 70 +++--- .../1.33.0/api_docs/torch_quantization.html | 14 +- releases/1.33.0/api_docs/torch_quantsim.html | 56 ++--- .../torch_visualization_compression.html | 30 +-- .../torch_visualization_quantization.html | 40 ++-- releases/1.33.0/genindex.html | 7 - releases/1.33.0/install/index.html | 64 +++-- releases/1.33.0/install/install_docker.html | 134 +++++------ releases/1.33.0/install/install_host.html | 155 ++++++------ releases/1.33.0/objects.inv | Bin 40291 -> 40076 bytes releases/1.33.0/py-modindex.html | 7 - releases/1.33.0/search.html | 7 - releases/1.33.0/searchindex.js | 2 +- releases/1.33.0/toplevelhidden.html | 10 +- .../api/nn.fake_quantization_mixin.html | 26 +- .../torch_docs/api/nn.quantization_mixin.html | 36 ++- ...torch.v2.quantization.affine.Quantize.html | 24 +- ...uantization.affine.QuantizeDequantize.html | 24 +- ...rch.v2.quantization.affine.dequantize.html | 14 +- ...orch.v2.quantization.affine.quantize_.html | 20 +- ...antization.affine.quantize_dequantize.html | 20 +- .../api/quantization/affine/index.html | 30 +-- .../float/FloatQuantizeDequantize.html | 20 +- .../api/quantization/float/index.html | 18 +- .../torch_docs/api/quantization/tensor.html | 38 +-- .../torch_docs/blockwise_quantization.html | 32 +-- .../1.33.0/torch_docs/encoding_analyzer.html | 22 +- releases/1.33.0/torch_docs/examples/ptq.html | 14 +- ...oding_analyzer.MinMaxEncodingAnalyzer.html | 14 +- ...g_analyzer.PercentileEncodingAnalyzer.html | 16 +- ...ncoding_analyzer.SqnrEncodingAnalyzer.html | 22 +- releases/1.33.0/torch_docs/index.html | 21 +- .../1.33.0/torch_docs/quantized_modules.html | 46 ++-- releases/1.33.0/torch_docs/quantizer.html | 44 ++-- .../torch_docs/tutorials/migration_guide.html | 60 +++-- .../tutorials/quickstart_guide.html | 46 ++-- .../aimet_torch/v2/nn/fake_quant.html | 2 +- .../aimet_torch/v2/quantization/tensor.html | 101 +++----- releases/1.33.0/torch_v2/_static/basic.css | 47 ++-- .../_templates/autosummary/class.html | 3 +- .../_templates/autosummary/function.html | 3 +- releases/1.33.0/torch_v2/install/index.html | 57 ++--- .../torch_v2/install/install_docker.html | 125 +++++----- .../1.33.0/torch_v2/install/install_host.html | 146 ++++++------ releases/1.33.0/torch_v2/objects.inv | Bin 11229 -> 11002 bytes releases/1.33.0/torch_v2/searchindex.js | 2 +- releases/1.33.0/torch_v2/toplevelhidden.html | 3 +- .../api/nn.fake_quantization_mixin.html | 19 +- .../torch_docs/api/nn.quantization_mixin.html | 29 ++- ...torch.v2.quantization.affine.Quantize.html | 17 +- ...uantization.affine.QuantizeDequantize.html | 17 +- ...rch.v2.quantization.affine.dequantize.html | 7 +- ...orch.v2.quantization.affine.quantize_.html | 13 +- ...antization.affine.quantize_dequantize.html | 13 +- .../api/quantization/affine/index.html | 23 +- .../float/FloatQuantizeDequantize.html | 13 +- .../api/quantization/float/index.html | 11 +- .../torch_docs/api/quantization/tensor.html | 31 ++- .../torch_docs/blockwise_quantization.html | 25 +- .../torch_docs/encoding_analyzer.html | 15 +- .../torch_v2/torch_docs/examples/ptq.html | 7 +- ...oding_analyzer.MinMaxEncodingAnalyzer.html | 7 +- ...g_analyzer.PercentileEncodingAnalyzer.html | 9 +- ...ncoding_analyzer.SqnrEncodingAnalyzer.html | 15 +- .../1.33.0/torch_v2/torch_docs/index.html | 14 +- .../torch_docs/quantized_modules.html | 39 +-- .../1.33.0/torch_v2/torch_docs/quantizer.html | 37 ++- .../torch_docs/tutorials/migration_guide.html | 53 +++-- .../tutorials/quickstart_guide.html | 39 ++- .../1.33.0/torch_v2/user_guide/adaround.html | 32 +-- .../torch_v2/user_guide/auto_quant.html | 24 +- .../torch_v2/user_guide/bn_reestimation.html | 27 +-- .../torch_v2/user_guide/channel_pruning.html | 23 +- .../compression_feature_guidebook.html | 10 +- .../greedy_compression_ratio_selection.html | 23 +- .../1.33.0/torch_v2/user_guide/index.html | 31 ++- .../torch_v2/user_guide/known_issues.html | 7 +- .../user_guide/model_compression.html | 43 ++-- .../torch_v2/user_guide/model_guidelines.html | 12 +- .../user_guide/model_quantization.html | 39 ++- .../post_training_quant_techniques.html | 33 +-- .../torch_v2/user_guide/quant_analyzer.html | 29 +-- .../quantization_aware_training.html | 23 +- .../quantization_configuration.html | 19 +- .../quantization_feature_guidebook.html | 10 +- .../torch_v2/user_guide/quantization_sim.html | 45 ++-- .../user_guide/quantsim_2.0_overview.html | 19 +- .../torch_v2/user_guide/release_notes.html | 79 +++--- .../torch_v2/user_guide/spatial_svd.html | 10 +- .../user_guide/visualization_compression.html | 27 +-- .../user_guide/visualization_quant.html | 23 +- .../torch_v2/user_guide/weight_svd.html | 10 +- .../1.33.0/torch_v2/user_guide/winnowing.html | 19 +- releases/1.33.0/user_guide/adaround.html | 39 +-- releases/1.33.0/user_guide/auto_quant.html | 31 +-- .../1.33.0/user_guide/bn_reestimation.html | 34 +-- .../1.33.0/user_guide/channel_pruning.html | 30 +-- .../compression_feature_guidebook.html | 17 +- releases/1.33.0/user_guide/examples.html | 54 ++--- .../greedy_compression_ratio_selection.html | 30 +-- releases/1.33.0/user_guide/index.html | 38 ++- releases/1.33.0/user_guide/known_issues.html | 14 +- .../1.33.0/user_guide/model_compression.html | 50 ++-- .../1.33.0/user_guide/model_guidelines.html | 19 +- .../1.33.0/user_guide/model_quantization.html | 46 ++-- .../post_training_quant_techniques.html | 40 +--- .../1.33.0/user_guide/quant_analyzer.html | 36 +-- .../quantization_aware_training.html | 30 +-- .../quantization_configuration.html | 26 +- .../quantization_feature_guidebook.html | 17 +- .../1.33.0/user_guide/quantization_sim.html | 52 ++-- .../user_guide/quantsim_2.0_overview.html | 26 +- releases/1.33.0/user_guide/release_notes.html | 86 +++---- releases/1.33.0/user_guide/spatial_svd.html | 17 +- .../user_guide/visualization_compression.html | 34 +-- .../user_guide/visualization_quant.html | 30 +-- releases/1.33.0/user_guide/weight_svd.html | 17 +- releases/1.33.0/user_guide/winnowing.html | 26 +- 259 files changed, 3256 insertions(+), 5104 deletions(-) diff --git a/releases/1.33.0/Examples/onnx/quantization/adaround.html b/releases/1.33.0/Examples/onnx/quantization/adaround.html index 114f3a7..fbb46e8 100644 --- a/releases/1.33.0/Examples/onnx/quantization/adaround.html +++ b/releases/1.33.0/Examples/onnx/quantization/adaround.html @@ -1,8 +1,7 @@ - - + Adaptive Rounding (AdaRound) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
  • Determining Quantization Parameters (Encodings)
  • Quantization Schemes
  • Configuring Quantization Simulation Ops
  • -
  • Quantization Simulation APIs
  • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,16 +1106,16 @@
      -
      +

      Adaptive Rounding (AdaRound)

      This notebook shows a working code example of how to use AIMET to perform Adaptive Rounding (AdaRound).

      AIMET quantization features typically use the “nearest rounding” technique for achieving quantization. When using the “nearest rounding” technique, the weight value is quantized to the nearest integer value.

      AdaRound optimizes a loss function using unlabeled training data to decide whether to quantize a specific weight to the closer integer value or the farther one. Using AdaRound quantization, a model is able to achieve an accuracy closer to the FP32 model, while using low bit-width integer quantization.

      -
      +

      Overall flow

      This notebook covers the following: 1. Instantiate the example evaluation and training pipeline 2. Convert an FP32 PyTorch model to ONNX and evaluate the model’s baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply AdaRound and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results

      • @@ -1131,7 +1123,7 @@

        What this notebook is not -
        +

        Dataset

        This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, use that. Otherwise, download the dataset from an appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

        Note1: The dataloader provided in this example notebook relies on the ImageNet dataset having the following characteristics: - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample.

        @@ -1145,9 +1137,9 @@

        Dataset -
        +

        1. Example evaluation and training pipeline

        The following is an example training and validation loop for this image classification task.

        -
        +


        -
        +

        3. Create a quantization simulation model and determine quantized accuracy

        -
        -
        +

      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this?

      @@ -1280,8 +1272,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_activation_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1303,8 +1295,8 @@

      Create Quantization Sim Model +

      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph, the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node.

      For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1357,9 +1349,9 @@

      Compute Encodings -
      +

      4. Apply Adaround

      We can now apply AdaRound to this model.

      Some of the parameters for AdaRound are described below

      @@ -1455,16 +1447,16 @@

      4. Apply Adaround -
      +

      Summary

      This example illustrated how the AIMET AdaRound API is invoked to achieve post training quantization. To use AIMET AdaRound for your specific needs, replace the model with your model and replace the data pipeline with your data pipeline. As indicated above, some parameters in this example have been chosen in such a way to make this example execute faster.

      We hope this notebook was useful for you to understand how to use AIMET for performing AdaRound.

      A few additional resources: - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/onnx/quantization/cle.html b/releases/1.33.0/Examples/onnx/quantization/cle.html index 52cc530..19c89a0 100644 --- a/releases/1.33.0/Examples/onnx/quantization/cle.html +++ b/releases/1.33.0/Examples/onnx/quantization/cle.html @@ -1,8 +1,7 @@ - - + Cross-Layer Equalization (CLE) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Cross-Layer Equalization (CLE)

      This notebook showcases a working code example of how to use AIMET to apply Cross-Layer Equalization (CLE). CLE is a post-training quantization techniques that aim to improve quantized accuracy of a given model. CLE does not need any data samples. This technique help recover quantized accuracy when the model quantization is sensitive to parameter quantization as opposed to activation quantization.

      To learn more about this technique, please refer to the “Data-Free Quantization Through Weight Equalization and Bias Correction” paper from ICCV 2019 - https://arxiv.org/abs/1906.04721

      Cross-Layer Equalization AIMET performs the following steps when running CLE: 1. Batch Norm Folding: Folds BN layers into Conv layers immediate before or after the Conv layers. 2. Cross-Layer Scaling: Given a set of consecutive Conv layers, equalizes the range of tensor values per-channel by scaling up/down per-channel weight tensor values of a layer and corresponding scaling down/up per-channel weight tensor values of the subsequent layer. 3. High Bias Folding: Cross-layer scaling may result in high bias parameter values for some layers. This technique folds some of the bias of a layer into the subsequent layer’s parameters.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply CLE and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1144,9 +1136,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

      -
      +


      -
      +

      3. Create a quantization simulation model and determine quantized accuracy

      -
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1278,9 +1270,9 @@

      Fold Batch Normalization layers -
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision - num_batches: The number of batches used to evaluate the model while calculating the quantization encodings.Number of batches to use for computing encodings. Only 5 batches are used here to speed up the process. In addition, the @@ -1350,9 +1342,9 @@

      Create Quantization Sim Model -
      +

      4. 1 Cross Layer Equalization

      The next cell performs cross-layer equalization on the model. As noted before, the function folds batch norms, applies cross-layer scaling, and then folds high biases.

      Note: Interestingly, CLE needs BN statistics for its procedure. If a BN folded model is provided, CLE will run the CLS (cross-layer scaling) optimization step but will skip the HBA (high-bias absorption) step. To avoid this, we simply load the original model again before running CLE.

      @@ -1396,15 +1388,15 @@

      4. 1 Cross Layer Equalization -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing Cross Layer Equalization (CLE).

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/onnx/quantization/quantsim.html b/releases/1.33.0/Examples/onnx/quantization/quantsim.html index 469c493..a054af5 100644 --- a/releases/1.33.0/Examples/onnx/quantization/quantsim.html +++ b/releases/1.33.0/Examples/onnx/quantization/quantsim.html @@ -1,8 +1,7 @@ - - + Quantization Simulation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,20 +1106,20 @@
      -
      +

      Quantization Simulation

      This notebook shows a working code example of how to use AIMET to perform quantization simulation (quantsim). Quantsim is an AIMET feature that adds quantization simulation ops (also called fake quantization ops sometimes) to a trained ML model in order to compute quantization encodings and estimate the resulting accuracy of the model when deployed on quantized ML accelerators.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation pipeline 2. Convert an FP32 PyTorch model to ONNX and evaluate the model’s baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simulation model to get a quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art quantized accuracy. For example, it uses a relatively quantization-friendly model like Resnet18. Also, optimization techniques such as Quantization-Aware Training, AdaRound, and Cross-Layer Equalization can be employed to improve the accuracy of quantized models.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1141,9 +1133,9 @@

      Dataset -
      +

      1. Example evaluation pipeline

      The following is an example validation loop for this image classification task.

      -
      +


      -
      +

      3. Create a quantization simulation model and determine quantized accuracy

      -
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1274,8 +1266,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_activation_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1296,9 +1288,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph, the model is not ready to be used yet. Before we can use the sim model for inference, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node. For activation quantization node, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1348,14 +1340,14 @@

      Compute Encodings +

      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QuantizationSimulation.

      Additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters.

      -
      -

      - +
      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/compression/channel_pruning.html b/releases/1.33.0/Examples/tensorflow/compression/channel_pruning.html index 92596b4..d946902 100644 --- a/releases/1.33.0/Examples/tensorflow/compression/channel_pruning.html +++ b/releases/1.33.0/Examples/tensorflow/compression/channel_pruning.html @@ -1,8 +1,7 @@ - - + Model Compression Using Channel Pruning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model Compression Using Channel Pruning

      This notebook shows a working code example of how to use AIMET to perform model compression. The Channel Pruning technique is used in this notebook to achieve model compression.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,21 +1118,21 @@

      Model Compression Using Channel Pruning +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      3.1 Compress model using Channel Pruning and evaluate it to find post-compression accuracy
      3.2 Fine-tune the model
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1233,9 +1225,9 @@

      Dataset -
      +

      2. Load the model and evaluate it to find the baseline accuracy

      For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

      Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

      @@ -1310,10 +1302,10 @@

      2. Load the model and evaluate it to find the baseline accuracy +

      +

      3. Compress the model and fine-tune

      -
      +

      3.1. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

      Now we use AIMET to define compression parameters for Channel Pruning, few of which are explained here

        @@ -1402,8 +1394,8 @@

        3.1. Compress model using Channel Pruning and evaluate it to find post-compr


      As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

      -
      -
      +
      +

      3.2. Fine-tune the model

      After the model is compressed using Channel Pruning, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1448,16 +1440,16 @@

      3.2. Fine-tune the model -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing compression with Channel Pruning. As indicated above, some parameters have been chosen in a way to run the example faster.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/compression/spatial_svd.html b/releases/1.33.0/Examples/tensorflow/compression/spatial_svd.html index 677e894..8ac0415 100644 --- a/releases/1.33.0/Examples/tensorflow/compression/spatial_svd.html +++ b/releases/1.33.0/Examples/tensorflow/compression/spatial_svd.html @@ -1,8 +1,7 @@ - - + Model compression Using Spatial SVD — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model compression Using Spatial SVD

      This notebook shows a working code example of how to use AIMET to perform model compression. The Spatial SVD technique is used in this notebook to achieve model compression.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,21 +1118,21 @@

      Model compression Using Spatial SVD +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      3.1 Compress model using Spatial SVD and evaluate it to find post-compression accuracy
      3.2 Fine-tune the model
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1168,9 +1160,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1242,9 +1234,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate it to find the baseline accuracy

        For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

        Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

        @@ -1319,10 +1311,10 @@

        2. Load the model and evaluate it to find the baseline accuracy +

        +

        3. Compress the model and fine-tune

        -
        +

        3.1. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

        Now we use AIMET to define compression parameters for Channel Pruning, few of which are explained here

          @@ -1409,8 +1401,8 @@

          3.1. Compress model using Channel Pruning and evaluate it to find post-compr


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +
        +

        3.2. Fine-tune the model

        After the model is compressed using Spatial SVD, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1455,16 +1447,16 @@

        3.2. Fine-tune the model -
        +

        Summary

        Hope this notebook was useful for you to understand how to use AIMET for performing compression with Spatial SVD. As indicated above, some parameters have been chosen in a way to run the example faster.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

        -
        -

        -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/compression/spatial_svd_channel_pruning.html b/releases/1.33.0/Examples/tensorflow/compression/spatial_svd_channel_pruning.html index 43994bd..16fcebe 100644 --- a/releases/1.33.0/Examples/tensorflow/compression/spatial_svd_channel_pruning.html +++ b/releases/1.33.0/Examples/tensorflow/compression/spatial_svd_channel_pruning.html @@ -1,8 +1,7 @@ - - + Model Compression Using Spatial SVD Followed by Channel Pruning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model Compression Using Spatial SVD Followed by Channel Pruning

      This notebook shows a working code example of how to use AIMET to perform model compression. Two model-compression techniques are applied back-to-back: Spatial SVD followed by Channel Pruning.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,7 +1118,7 @@

      Model Compression Using Spatial SVD Followed by Channel Pruning +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      @@ -1135,14 +1127,14 @@

      Overall flow3.3 Compress model using Channel Pruning and evaluate it to find post-compression accuracy

      3.4 Fine-tune the model after Channel Pruning
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1170,9 +1162,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1244,9 +1236,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate it to find the baseline accuracy

        For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

        Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

        @@ -1321,10 +1313,10 @@

        2. Load the model and evaluate it to find the baseline accuracy +

        +

        3. Compress the model and fine-tune

        -
        +

        3.1. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

        Now we use AIMET to define compression parameters for Channel Pruning, few of which are explained here

          @@ -1411,8 +1403,8 @@

          3.1. Compress model using Channel Pruning and evaluate it to find post-compr


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +
        +

        3.2. Fine-tune the model after Spatial SVD

        After the model is compressed using Spatial SVD, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1457,8 +1449,8 @@

        3.2. Fine-tune the model after Spatial SVD +

        +

        3.3. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

        The fine-tuned model, compressed with Spatial SVD, can be further compressed using Channel Pruning method.
        @@ -1547,8 +1539,8 @@

        3.3. Compress model using Channel Pruning and evaluate it to find post-compr


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +

      +

      3.4. Fine-tune the model after Channel Pruning

      After the model is compressed using Spatial SVD followed by Channel Pruning, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1593,10 +1585,10 @@

      3.4. Fine-tune the model after Channel Pruning -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing compression with Spatial SVD followed by Channel Pruning. As indicated above, some parameters have been chosen in a way to run the example faster.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

      @@ -1608,9 +1600,9 @@

      Summary - - + Adaptive Rounding (AdaRound) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,16 +1106,16 @@
      -
      +

      Adaptive Rounding (AdaRound)

      This notebook shows a working code example of how to use AIMET to perform Adaptive Rounding (AdaRound).

      AIMET quantization features typically use the “nearest rounding” technique for achieving quantization. When using the “nearest rounding” technique, the weight value is quantized to the nearest integer value.

      AdaRound optimizes a loss function using unlabeled training data to decide whether to quantize a specific weight to the closer integer value or the farther one. Using AdaRound quantization, a model is able to achieve an accuracy closer to the FP32 model, while using low bit-width integer quantization.

      -
      +

      Overall flow

      This notebook covers the following: 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply AdaRound and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results

      • @@ -1131,7 +1123,7 @@

        What this notebook is not -
        +

        Dataset

        This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, use that. Otherwise, download the dataset from an appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

        Note1: The dataloader provided in this example notebook relies on the ImageNet tfrecords dataset having the following characteristics: - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. - Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

        @@ -1159,9 +1151,9 @@

        Dataset -
        +

        1. Example Evaluation and Training Pipeline

        The following is an example training and validation loop for this image classification task.

          @@ -1238,9 +1230,9 @@

          1. Example Evaluation and Training Pipeline -
          +

          2. Load the model and evaluate to get a baseline FP32 accuracy score

          For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

          Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

          @@ -1313,12 +1305,12 @@

          2. Load the model and evaluate to get a baseline FP32 accuracy score

        -
        +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this?

      @@ -1338,8 +1330,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel.

      Before we create the QuantizationSimModel, we save and load a version of the BN folded session for QuantSim to use. QuantSim will insert fake quantization ops in the session passed into it, and we want to maintain a fresh copy of the BN folded session for use in AdaRound later.

      @@ -1372,9 +1364,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph, the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node.

      For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1439,9 +1431,9 @@

      Compute Encodings -
      +

      4. Apply AdaRound

      We can now apply AdaRound to this model.

      Some of the parameters for AdaRound are described below:

      @@ -1517,16 +1509,16 @@

      4. Apply AdaRound -
      +

      Summary

      This example illustrated how the AIMET AdaRound API is invoked to achieve post training quantization. To use AIMET AdaRound for your specific needs, replace the model with your model and replace the data pipeline with your data pipeline. As indicated above, some parameters in this example have been chosen in such a way to make this example execute faster.

      We hope this notebook was useful for you to understand how to use AIMET for performing AdaRound.

      A few additional resources: - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +

      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/autoquant.html b/releases/1.33.0/Examples/tensorflow/quantization/autoquant.html index 720015b..d9c893f 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/autoquant.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/autoquant.html @@ -1,8 +1,7 @@ - - + AutoQuant — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,22 +1106,22 @@
      -
      +

      AutoQuant

      This notebook shows a working code example of how to use AIMET AutoQuant feature.

      AIMET offers a suite of neural network post-training quantization techniques. Often, applying these techniques in a specific sequence, results in better accuracy and performance. Without the AutoQuant feature, the AIMET user needs to manually try out various combinations of AIMET quantization features. This manual process is error-prone and often time-consuming.

      The AutoQuant feature, analyzes the model, determines the sequence of AIMET quantization techniques and applies these techniques. In addition, the user can specify the amount of accuracy drop that can be tolerated, in the AutoQuant API. As soon as this threshold accuracy is reached, AutoQuant stops applying any additional quantization technique. In summary, the AutoQuant feature saves time and automates the quantization of the neural networks.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load a pretrained FP32 model 3. Determine the baseline FP32 accuracy 4. Define constants and helper functions 5. Apply AutoQuant

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art AutoQuant results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train’ for training files and ‘valid’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1157,9 +1149,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1208,8 +1200,8 @@

        1. Example evaluation and training pipeline +

      +

      2. Load a pretrained FP32 model

      For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

      Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

      @@ -1272,8 +1264,8 @@

      2. Load a pretrained FP32 model +

      +

      3. Determine the baseline FP32 accuracy

      Let’s determine the FP32 (floating point 32-bit) accuracy of this model using evaluate() routine

      @@ -1285,8 +1277,8 @@

      3. Determine the baseline FP32 accuracy +

      +

      4. Define Constants and Helper functions

      In this section the constants and helper functions needed to run this example are defined.

        @@ -1321,9 +1313,9 @@

        4. Define Constants and Helper functions -
        +

        Prepare unlabeled dataset

        The AutoQuant feature utilizes an unlabeled dataset to achieve quantization. Below cell shows how to get an unlabeled Dataset object from a labeled Dataset.

        @@ -1338,8 +1330,8 @@

        Prepare unlabeled dataset +

        +

        Prepare the evaluation callback function

        The eval_callback() function takes the session object to evaluate and the number of samples to use as arguments. If the num_samples argument is None, the whole evaluation dataset is used to evaluate the model.

        @@ -1373,8 +1365,8 @@

        Prepare the evaluation callback function +

        +

        5. Apply AutoQuant

        As a first step, the AutoQuant object is created.

        The allowed_accuracy_drop parameter is set by the user to convey to the AutoQuant feature, how much accuracy drop is tolerated by the user. AutoQuant applies a series of quantization features. When the allowed accuracy is reached, AutoQuant stops applying any subsequent quantization feature. Please refer AutoQuant User Guide and API documentation for complete details.

        @@ -1390,8 +1382,8 @@

        5. Apply AutoQuant +

        +

        Optionally set AdaRound Parameters

        The AutoQuant feature internally uses default parameters to execute the AdaRound step. If and only if necessary, the default AdaRound Parameters should be modified using the API shown below.

        Note: To execute this example faster, the default value of the num_iterations parameter has been reduced from 10000 to 2000

        @@ -1409,8 +1401,8 @@

        Optionally set AdaRound Parameters +

        +

        Run AutoQuant

        This step applies the AutoQuant feature. The best possible quantized model, the associated eval_score and the path to the AdaRound encoding files are returned.

        @@ -1433,15 +1425,15 @@

        Run AutoQuant -
        +

        Summary

        Hope this notebook was useful for you to understand how to use AIMET AutoQuant feature.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and parameters - Refer to the other example notebooks to understand how to use AIMET CLE and AdaRound features in a standalone fashion.

        -
        -

        -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/bn_reestimation.html b/releases/1.33.0/Examples/tensorflow/quantization/bn_reestimation.html index 8ffbaeb..1c74c2b 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/bn_reestimation.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/bn_reestimation.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with BatchNorm Re-estimation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,19 +1106,19 @@
      -
      +

      Quantization-Aware Training with BatchNorm Re-estimation

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training) with batchnorm re-estimation. Batchnorm re-estimation is a technique for countering potential instability of batchnrom statistics (i.e. running mean and variance) during QAT. More specifically, batchnorm re-estimation recalculates the batchnorm statistics based on the model after QAT. By doing so, we aim to make our model learn batchnorm statistics from from stable outputs after QAT, rather than from likely noisy outputs during QAT.

      -
      +

      Overall flow

      This notebook covers the following steps: 1. Create a quantization simulation model with fake quantization ops inserted. 2. Finetune and evaluate the quantization simulation model 3. Re-estimate batchnorm statistics and compare the eval score before and after re-estimation. 4. Fold the re-estimated batchnorm layers and export the quantization simulation model

      -
      -
      +
      +

      What this notebook is not

      In this notebook, we will focus how to apply batchnorm re-estimation after QAT, rather than covering all the details about QAT itself. For more information about QAT, please refer to QAT notebook or QAT range learning notebook.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1140,9 +1132,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1220,9 +1212,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load FP32 model

        AIMET currently support BatchNorm Re-estimation on Tensorflow sessions. In this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to work with Tensorflow session. Similarly, you can load any pretrained Tensorflow model. Please refer to QAT notebook for more detail.

        @@ -1251,7 +1243,7 @@

        2. Load FP32 model +

        BatchNorm Rewriter

        In the later notebook, we will make changes to parameters of BatchNorms to improve performance. However, depending on how the BatchNorm was configured, this might be difficult to achieve.

        AIMET provides model_sess_bn_mutable that changes BatchNorm layer to make it easier to modify parameters.

        @@ -1274,12 +1266,12 @@

        BatchNorm Rewriter -
        +

        3. Create a quantization simulation model and Perform QAT

        -
        +

        Create Quantization Sim Model

        Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

        @@ -1372,10 +1364,10 @@

        Create Quantization Sim Model -
        +

        Compute Encodings

        Even though AIMET has added ‘quantizer’ nodes to the model graph, the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node. For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

        @@ -1429,7 +1421,7 @@

        Compute Encodings +

        Perform QAT

        To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1452,12 +1444,12 @@

        Perform QAT -
        +

        4. Perform BatchNorm Reestimation

        -
        +

        Re-estimate BatchNorm Statistics

        AIMET provides a helper function, reestimate_bn_stats, for re-estimating batchnorm statistics. Here is the full list of parameters for this function: * model: Model to re-estimate the BatchNorm statistics. * dataloader Train dataloader. * num_batches (optional): The number of batches to be used for reestimation. (Default: 100) * forward_fn (optional): Optional adapter function that performs forward pass given a model and a input batch yielded from the data loader. If not specified, it is expected that inputs yielded from dataloader can be passed directly to the model.

        @@ -1489,8 +1481,8 @@

        Re-estimate BatchNorm Statistics +

        +

        Fold BatchNorm Layers

        So far, we have improved our quantization simulation model through QAT and batchnorm re-estimation. The next step would be to actually take this model to target. But first, we should fold the batchnorm layers for our model to run on target devices more efficiently.

        @@ -1503,10 +1495,10 @@

        Fold BatchNorm Layers

        -
        -

        +

        +

        -
        +

        5. Export Model

        As the final step, we will export the model to run it on actual target devices. AIMET QuantizationSimModel provides an export API for this purpose.

        @@ -1518,14 +1510,14 @@

        5. Export Model +

        +

        Summary

        Hope this notebook was useful for you to understand how to use batchnorm re-estimation feature of AIMET.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters. - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT methods.

        -
        -
        -

        +

      +

      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/cle_bc.html b/releases/1.33.0/Examples/tensorflow/quantization/cle_bc.html index fbeb192..e70b49c 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/cle_bc.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/cle_bc.html @@ -1,8 +1,7 @@ - - + Cross-Layer Equalization (CLE) and Bias Correction (BC) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Cross-Layer Equalization (CLE) and Bias Correction (BC)

      This notebook showcases a working code example of how to use AIMET to apply Cross-Layer Equalization (CLE) and Bias Correction (BC). CLE and BC are post-training quantization techniques that aim to improve quantized accuracy of a given model. CLE does not need any data samples. BC may optionally need unlabelled data samples. These techniques help recover quantized accuracy when the model quantization is sensitive to parameter quantization as opposed to activation quantization.

      To learn more about this techniques, please refer to the “Data-Free Quantization Through Weight Equalization and Bias Correction” paper from ICCV 2019 - https://arxiv.org/abs/1906.04721

      @@ -1124,17 +1116,17 @@

      Cross-Layer Equalization (CLE) and Bias Correction (BC)Bias Correction

      Quantization sometimes leads to a shift in layer outputs. This techniques helps correct this shift by adjusting the bias parameters of that layer. Note that this technique is generally applied after CLE, but it is a optional step.
      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply CLE, BC and and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1162,9 +1154,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1237,9 +1229,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

        Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

        @@ -1312,12 +1304,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +

      -
      +
      -
      + +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1335,9 +1327,9 @@

      Fold Batch Normalization layers -
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision - num_batches: The number of batches used to evaluate the model while calculating the quantization encodings.Number of batches to use for computing encodings. Only 5 batches are used here to speed up the process. In addition, the @@ -1362,9 +1354,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node. For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1426,9 +1418,9 @@

      Compute Encodings -
      +

      4. 1 Cross Layer Equalization

      The next cell performs cross-layer equalization on the model. As noted before, the function folds batch norms, applies cross-layer scaling, and then folds high biases.

      Note: Interestingly, CLE needs BN statistics for its procedure. If a BN folded model is provided, CLE will run the CLS (cross-layer scaling) optimization step but will skip the HBA (high-bias absorption) step. To avoid this, we simply load the original model again before running CLE.

      @@ -1468,9 +1460,9 @@

      4. 1 Cross Layer Equalization -
      +

      4. 2 Bias Correction

      This section shows how we can apply AIMET Bias Correction on top of the already equalized model from the previous step. Bias correction under the hood uses a reference FP32 model and a QuantizationSimModel to perform its procedure. More details are explained in the AIMET User Guide documentation.

      For the correct_bias API, we pass the following parameters

      @@ -1534,15 +1526,15 @@

      4. 2 Bias Correction

      -
      +


      -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing Cross Layer Equalization (CLE) and Bias Correction (BC).

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -
      -

      + + + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/adaround.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/adaround.html index bf72abe..f21fd74 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/adaround.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/adaround.html @@ -1,8 +1,7 @@ - - + Adaptive Rounding (Adaround) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,22 +1106,22 @@
      -
      +

      Adaptive Rounding (Adaround)

      This notebook illustrates the use of AIMET Adaround feature.

      AIMET quantization features, by default, use the “nearest rounding” technique for achieving quantization. When using the “nearest rounding” technique, the weight value is quantized to the nearest integer value. The Adaptive Rounding (AdaRound) feature, uses a smaller subset of the unlabelled training data to adaptively round the weights. AdaRound, optimizes a loss function using the unlabelled training data to adaptively decide whether to quantize a specific weight to the integer value near it or away from it. Using the AdaRound quantization, a model is able to achieve an accuracy closer to the FP32 model, while using low bit-width integer quantization.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply Adaround and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1142,9 +1134,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1305,9 +1297,9 @@

      Fold Batch Normalization layers -
      +

      4. Apply Adaround

      We can now apply AdaRound to this model.

      Some of the parameters for AdaRound are described below

      @@ -1387,16 +1379,16 @@

      4. Apply Adaround -
      +

      Summary

      This example illustrated how AIMET AdaRound API is invoked to achieve post training quantization. To use AIMET AdaRound for your specific needs, replace the model with your model and replace the data pipeline with your data pipeline. This will provide you a quick starting point. As indicated above, some parameters in this example have been chosen in such a way way to make this example execute faster.

      Hope this notebook was useful for you to understand how to use AIMET for performing Adaround.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/autoquant.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/autoquant.html index 920685d..f5459c1 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/autoquant.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/autoquant.html @@ -1,8 +1,7 @@ - - + AutoQuant — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,22 +1106,22 @@
      -
      +

      AutoQuant

      This notebook shows a working code example of how to use AIMET AutoQuant feature.

      AIMET offers a suite of neural network post-training quantization techniques. Often, applying these techniques in a specific sequence, results in better accuracy and performance. Without the AutoQuant feature, the AIMET user needs to manually try out various combinations of AIMET quantization features. This manual process is error-prone and often time-consuming.

      The AutoQuant feature, analyzes the model, determines the sequence of AIMET quantization techniques and applies these techniques. In addition, the user can specify the amount of accuracy drop that can be tolerated, in the AutoQuant API. As soon as this threshold accuracy is reached, AutoQuant stops applying any additional quantization technique. In summary, the AutoQuant feature saves time and automates the quantization of the neural networks.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load a pretrained FP32 model 3. Determine the baseline FP32 accuracy 4. Define constants and helper functions 5. Apply AutoQuant

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art AutoQuant results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#)

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1154,9 +1146,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1209,8 +1201,8 @@

        1. Example evaluation and training pipeline +

      +

      2. Load a pretrained FP32 model

      For this example notebook, we are going to load a pretrained ResNet50 model from Keras. Similarly, you can load any pretrained Keras model instead.

      @@ -1223,8 +1215,8 @@

      2. Load a pretrained FP32 model +

      +

      3. Determine the baseline FP32 accuracy

      Let’s determine the FP32 (floating point 32-bit) accuracy of this model using evaluate() routine

      @@ -1235,8 +1227,8 @@

      3. Determine the baseline FP32 accuracy +

      +

      4. Define Constants and Helper functions

      In this section the constants and helper functions needed to run this example are defined.

        @@ -1264,8 +1256,8 @@

        4. Define Constants and Helper functions +

      +

      Prepare the evaluation callback function

      The eval_callback() function takes the model object to evaluate and compile option dictionary and the number of samples to use as arguments. If the num_samples argument is None, the whole evaluation dataset is used to evaluate the model.

      @@ -1292,8 +1284,8 @@

      Prepare the evaluation callback function +

      +

      5. Apply AutoQuant

      As a first step, the AutoQuant object is created.

      The allowed_accuracy_drop parameter is set by the user to convey to the AutoQuant feature, how much accuracy drop is tolerated by the user. AutoQuant applies a series of quantization features. When the allowed accuracy is reached, AutoQuant stops applying any subsequent quantization feature. Please refer AutoQuant User Guide and API documentation for complete details.

      @@ -1307,8 +1299,8 @@

      5. Apply AutoQuant +

      +

      Optionally set AdaRound Parameters

      The AutoQuant feature internally uses default parameters to execute the AdaRound step. If and only if necessary, the default AdaRound Parameters should be modified using the API shown below.

      Note: To execute this example faster, the default value of the num_iterations parameter has been reduced from 10000 to 2000

      @@ -1326,8 +1318,8 @@

      Optionally set AdaRound Parameters +

      +

      Run AutoQuant

      This step applies the AutoQuant feature. The best possible quantized model, the associated eval_score and the path to the AdaRound encoding files are returned.

      @@ -1338,15 +1330,15 @@

      Run AutoQuant -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET AutoQuant feature.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and parameters - Refer to the other example notebooks to understand how to use AIMET CLE and AdaRound features in a standalone fashion.

      -
      -

      -

      +
      +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/bn_reestimation.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/bn_reestimation.html index 7d3aa7f..d41d3ef 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/bn_reestimation.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/bn_reestimation.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with BatchNorm Re-estimation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,15 +1106,15 @@
      -
      +

      Quantization-Aware Training with BatchNorm Re-estimation

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training) with batchnorm re-estimation. Batchnorm re-estimation is a technique for countering potential instability of batchnorm statistics (i.e. running mean and variance) during QAT. More specifically, batchnorm re-estimation recalculates the batchnorm statistics based on the model after QAT. By doing so, we aim to make our model learn batchnorm statistics from stable outputs after QAT, rather than from likely noisy outputs during QAT.

      -
      +

      Overall flow

      This notebook covers the following steps: 1. Instantiate the example evaluation and training pipeline 2. Define Constants and Datasets Prepare 3. Create the model in Keras 4. Train and evaluate the model 5. Quantize the model with QuantSim 6. Finetune and evaluate the quantization simulation model 7. Re-estimate batchnorm statistics and compare the eval score before and after re-estimation 8. Fold the re-estimated batchnorm layers and export the quantization simulation model


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#)

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1146,9 +1138,9 @@

      Dataset -
      +

      1. Instantiate the example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1201,8 +1193,8 @@

        1. Instantiate the example evaluation and training pipeline +

      +

      2. Define Constants and Datasets Prepare

      In this section the constants and helper functions needed to run this example are defined.

        @@ -1228,8 +1220,8 @@

        2. Define Constants and Datasets Prepare +

      +

      2. Create the model in Keras

      Currently, only Keras models built using the Sequential or Functional APIs are compatible with QuantSim - models making use of subclassed layers are incompatible. Therefore, we use the Functional API to create the model used in this example

      @@ -1249,8 +1241,8 @@

      2. Create the model in Keras +

      +

      3. Train and evaluate the model

      Before we can quantize the model and apply QAT, the FP32 model must be trained so that we can get a baseline accuracy.

      @@ -1271,8 +1263,8 @@

      3. Train and evaluate the model +

      +

      4. Create a QuantizationSim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.training_range_learning_with_tf_init” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1363,8 +1355,8 @@

      4. Create a QuantizationSim Model +

      +

      Prepare the evaluation callback function

      The eval_callback() function takes the model object to evaluate and compile option dictionary and the number of samples to use as arguments. If the num_samples argument is None, the whole evaluation dataset is used to evaluate the model.

      @@ -1415,8 +1407,8 @@

      Prepare the evaluation callback function +

      +

      5. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so. For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1455,7 +1447,7 @@

      5. Perform QAT +

      Fold BatchNorm Layers

      So far, we have improved our quantization simulation model through QAT and batchnorm re-estimation. The next step would be to actually take this model to target. But first, we should fold the batchnorm layers for our model to run on target devices more efficiently.

      @@ -1467,10 +1459,10 @@

      Fold BatchNorm Layers

      -
      -

      +
      +

      -
      +

      5. Export Model

      As the final step, we will export the model to run it on actual target devices. AIMET QuantizationSimModel provides an export API for this purpose.

      @@ -1483,14 +1475,14 @@

      5. Export Model +

      +

      Summary

      Hope this notebook was useful for you to understand how to use batchnorm re-estimation feature of AIMET.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters. - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT methods.

      -
      -
      - +
      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/keras_transformer_qat.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/keras_transformer_qat.html index ebb73ba..3bd7141 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/keras_transformer_qat.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/keras_transformer_qat.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with a Keras Transformer Model — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,15 +1106,15 @@
      -
      +

      Keras Model Preparer

      This notebook shows how to prepare a Keras model for quantization. Specifically, this preparer converts a Keras model with subclass layers into a Keras model with functional layers. This is required for quantization because the AIMET quantization tooling only supports the Functional and Sequantial Keras model building API’s.

      To learn more about the Keras Model Preparer, please refer to the API Docs in AIMET.

      -
      +

      Overall flow

      This notebook covers the following 1. Creating a Keras model with subclass layers 2. Converting the Keras model with subclass layers to a Keras model with functional layers 3. Showing similarities and differences between the original and converted models 4. Dicussing the limitations of the Keras Model Preparer


      -
      +

      1. Creating a Keras model with subclass layers

      First, we will create a Keras model with subclass layers. For this notebook example, we will use a model defined by Keras that utilizes subclass layers. This model is a text classification transformer model and can be found here. The subclass layers used in this model are - TokenAndPositionEmbedding and TransformerBlock. They are defined below.

      +

      -
      +

      2. Converting the Keras model with subclass layers to a Keras model with functional layers

      The Keras Model Preparer can be used to convert a Keras model with subclass layers to a Keras model with functional layers. The Keras Model Preparer can be imported from aimet_tensorflow.keras.model_preparer. The Keras Model Preparer takes in a Keras model with subclass layers and returns a Keras model with functional layers. Note that the prepare_model function takes an optional input_layer parameter. This parameter is required if the model begins with a subclass layer. In this case, the model does not begin with a subclass layer, so we do not need to provide an input_shape parameter.

      @@ -1220,9 +1212,9 @@

      2. Converting the Keras model with subclass layers to a Keras model with fun

      We can see that the Keras Model Preparer has converted the model with subclass layers to a model with functional layers. Specifically, it has extracted the call function of each of these layers and created a functional layer from it.

      -
      +

      -
      +

      3. Showing similarities and differences between the original and converted models

      We can see that the original model and the converted model are symmetrical. The only difference is that the subclass layers are unwrapped. This means that the converted model is functionally identical to the original model. We can test this in a few ways.

        @@ -1312,8 +1304,8 @@

        3. Showing similarities and differences between the original and converted m

      - -
      + +

      4. Discussing the limitations of the Keras Model Preparer

      • The AIMET Keras ModelPreparer API is able to convert subclass layers that have arthmetic experssion in their call function. However, this API and Keras, will convert these operations to TFOPLambda layers which are not currently supported by AIMET Keras Quantization API. If possible, it is recommended to have the subclass layers call function ressemble the Keras Functional API layers. For example, if a subclass layer has two convolution layers in its call function, the call function should @@ -1327,15 +1319,15 @@

        4. Discussing the limitations of the Keras Model Preparer -
        +

        Summary

        Hopefully this notebook was useful for you to understand how to use the Keras Model Preparer.

        Few additional resources: - AIMET API Docs

        -
        -

      - + + + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.html index 15acb86..d1d4202 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,28 +1106,28 @@
      -
      +

      Quantization-Aware Training

      This notebook shows a working code example of how to use AIMET to perform Quantization Aware Training(QAT). QAT is an AIMET feature adding quantization simulation ops to a pre-trained model and using a standard training pipeline to fine-tune the model for a few epochs. The resulting model should show improved accuracy on quantized ML accelerators.

      The quantization parameters(like encoding min/max/scale/offset) for activations are computed once. During fine-tuning, the model weights are updated to minimize the effects of quantization in the forward pass, keeping the quantization parameters constant.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load a pretrained FP32 model and determine the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simulation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model using QAT and evaluate the simulation model to get a post fine-tuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters like number of epochs are deliberately chosen to have the notebook execute more quickly.

      -
      +

      Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

      • Does AIMET have any limitations on how the training, validation pipeline for QAT is written? Not really. We will see later that AIMET will modify the user’s model to create a QuantizationSim model which is still a TensorFlow model. This QuantizationSim model can be used in place of the original model when doing inference or training.

      • Does AIMET put any limitation on the interface of evaluate() or train() methods? Not really. You should be able to use your existing evaluate and train routines as-is.

      -
      -
      +
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#)

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1144,9 +1136,7 @@

      Dataset
      [ ]:
       

      - -
      import os
      +
      import os
       os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
       
       import tensorflow as tf
       
      -
      -
      +
      +

      1. Load the dataset

      We defined a few utility functions and assign the training and validation dataset to dataset_train and dataset_valid respectively

      [ ]:
       
      -
      dataset_train = dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(
      -    directory=os.path.join(DATASET_DIR, "train"),
      -    labels="inferred",
      -    label_mode="categorical",
      -    batch_size=BATCH_SIZE,
      -    shuffle=True,
      -    image_size=IMAGE_SIZE
      -)
      -dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(
      -    directory=os.path.join(DATASET_DIR, "val"),
      -    labels="inferred",
      -    label_mode="categorical",
      -    batch_size=BATCH_SIZE,
      -    shuffle=False,
      -    image_size=IMAGE_SIZE
      -)
      +
      import glob
      +
      +def decode_example(example):
      +    """ Decode TFRecord and get image and label
      +    """
      +    features = tf.compat.v1.parse_single_example(example,
      +                                        features={'image/class/label': tf.compat.v1.FixedLenFeature([], tf.compat.v1.int64),
      +                                                    'image/encoded': tf.compat.v1.FixedLenFeature([], tf.compat.v1.string)})
      +    image_data = features['image/encoded']
      +    label = tf.compat.v1.cast(features['image/class/label'], tf.int32)
      +    image = tf.compat.v1.image.decode_jpeg(image_data, channels=3)
      +    return image, label
      +
      +def get_imagenet_dataset(dataset_path, split_name, num_parallel_reads=16):
      +    """Returns an decoded (image, label) TensorFlow Dataset object for ImageNet
      +    """
      +    # Create the TFRecordDataset
      +    glob_name = f'{os.path.join(dataset_path, split_name)}-*'
      +    tf_record_files = glob.glob(glob_name)
      +    dataset = tf.compat.v1.data.TFRecordDataset(filenames=tf_record_files, num_parallel_reads=num_parallel_reads)
      +    dataset = dataset.map(decode_example, num_parallel_calls=num_parallel_reads)
      +    return dataset
      +
      +
      +
      +
      +
      [ ]:
      +
      +
      +
      dataset_train = get_imagenet_dataset(DATASET_DIR, 'train')
      +dataset_valid = get_imagenet_dataset(DATASET_DIR, 'validation')
       
      - -
      +
      +

      2. Load a pretrained FP32 model

      For this example notebook, we are going to load a pretrained ResNet50 model from Keras. Similarly, you can load any pretrained Keras model instead.

      [ ]:
       
      -
      from tensorflow.keras.applications.resnet import ResNet50
      +
      from tensorflow.keras.applications.resnet import ResNet50
       
      -model = ResNet50(weights="imagenet")
      -model.compile(optimizer="adam", loss="categorical_crossentropy")
      +model = ResNet50(weights='imagenet')
       
      - -
      +
      +

      3. Determine the baseline FP32 accuracy

      Let’s determine the FP32 (floating point 32-bit) accuracy of this model using evaluate() routine

      [ ]:
       
      -
      model.evaluate(dataset_valid)
      +
      model.evaluate(dataset_valid)
       
      - -
      +
      +

      4. Create a QuantizationSim Model and determine quantized accuracy

      -
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1228,14 +1232,14 @@

      Fold Batch Normalization layers
      [ ]:
       

      -
      from aimet_tensorflow.keras.batch_norm_fold import fold_all_batch_norms
      +
      from aimet_tensorflow.keras.batch_norm_fold import fold_all_batch_norms
       
       _, model = fold_all_batch_norms(model)
       
      -
      -
      +
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf” - Other Supported options for QAT are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1244,7 +1248,7 @@

      Create Quantization Sim Model
      [ ]:
       

      -
      from aimet_tensorflow.keras.quantsim import QuantizationSimModel
      +
      +

      Compute Encodings

      Even though AIMET has wrapped the layers to act as being ‘quantized’ but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ layer. For activation quantization layers, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1267,11 +1271,12 @@

      Compute Encodings
      [ ]:
       

      -
      -
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
      +
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                             forward_pass_callback_args=1000)
       
      - -
      +
      +

      Compile the model

      Configure the model for training and evaluation. The model must be compiled before evaluation

      [ ]:
       
      -
      sim.model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
      +
      sim.model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
       
      - -
      +
      +

      Evaluate the performance of the quantized model

      Next, we can evaluate the performance of the quantized model

      [ ]:
       
      -
      sim.model.evaluate(dataset_valid)
      +
      sim.model.evaluate(dataset_valid)
       
      - - -
      +
      +
      +

      5. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyperparameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so. For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1332,45 +1337,45 @@

      5. Perform QAT
      [ ]:
       

      -
      quantized_callback = tf.keras.callbacks.TensorBoard(log_dir="./log/quantized")
      +
      quantized_callback = tf.keras.callbacks.TensorBoard(log_dir="./log/quantized")
       history = sim.model.fit(dataset_train, epochs=1, validation_data=dataset_valid, callbacks=[quantized_callback])
       
      - -
      +
      +

      6. Evaluate validation accuracy after QAT

      Next, let’s evaluate the validation accuracy of our model after QAT

      [ ]:
       
      -
      sim.model.evaluate(dataset_valid)
      +
      sim.model.evaluate(dataset_valid)
       
      - -
      +
      +

      7. Export the encodings

      Finally, let’s compute and export the encodings of the model after performing QAT. When comparing the encodings file generated by this step and the encodings generated before quantization, there should be some differences. These differences are an artifact of QAT.

      [ ]:
       
      -
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
      +
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                             forward_pass_callback_args=1000)
       sim.export('./data', 'model_after_qat')
       
      - -
      +
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT with range-learning

      - - - +
      +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.ipynb b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.ipynb index 2040a13..cbd1a5d 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.ipynb +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat.ipynb @@ -3,10 +3,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "# Quantization-Aware Training\n", @@ -31,10 +28,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Example evaluation and training pipeline\n", @@ -48,10 +42,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Dataset\n", @@ -67,26 +58,18 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ - "DATASET_DIR = '/path/to/imagenet_dir' # Please replace this with a real directory\n", - "BATCH_SIZE = 128\n", - "IMAGE_SIZE = (224, 224)" + "DATASET_DIR = '/path/to/dir/' # Please replace this with a real directory" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -99,10 +82,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 1. Load the dataset\n", @@ -114,38 +94,50 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ - "dataset_train = dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(\n", - " directory=os.path.join(DATASET_DIR, \"train\"),\n", - " labels=\"inferred\",\n", - " label_mode=\"categorical\",\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=True,\n", - " image_size=IMAGE_SIZE\n", - ")\n", - "dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(\n", - " directory=os.path.join(DATASET_DIR, \"val\"),\n", - " labels=\"inferred\",\n", - " label_mode=\"categorical\",\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=False,\n", - " image_size=IMAGE_SIZE\n", - ")" + "import glob\n", + "\n", + "def decode_example(example):\n", + " \"\"\" Decode TFRecord and get image and label\n", + " \"\"\"\n", + " features = tf.compat.v1.parse_single_example(example,\n", + " features={'image/class/label': tf.compat.v1.FixedLenFeature([], tf.compat.v1.int64),\n", + " 'image/encoded': tf.compat.v1.FixedLenFeature([], tf.compat.v1.string)})\n", + " image_data = features['image/encoded']\n", + " label = tf.compat.v1.cast(features['image/class/label'], tf.int32)\n", + " image = tf.compat.v1.image.decode_jpeg(image_data, channels=3)\n", + " return image, label\n", + "\n", + "def get_imagenet_dataset(dataset_path, split_name, num_parallel_reads=16):\n", + " \"\"\"Returns an decoded (image, label) TensorFlow Dataset object for ImageNet\n", + " \"\"\"\n", + " # Create the TFRecordDataset\n", + " glob_name = f'{os.path.join(dataset_path, split_name)}-*'\n", + " tf_record_files = glob.glob(glob_name)\n", + " dataset = tf.compat.v1.data.TFRecordDataset(filenames=tf_record_files, num_parallel_reads=num_parallel_reads)\n", + " dataset = dataset.map(decode_example, num_parallel_calls=num_parallel_reads)\n", + " return dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dataset_train = get_imagenet_dataset(DATASET_DIR, 'train')\n", + "dataset_valid = get_imagenet_dataset(DATASET_DIR, 'validation')" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 2. Load a pretrained FP32 model\n", @@ -157,26 +149,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ "from tensorflow.keras.applications.resnet import ResNet50\n", "\n", - "model = ResNet50(weights=\"imagenet\")\n", - "model.compile(optimizer=\"adam\", loss=\"categorical_crossentropy\")" + "model = ResNet50(weights='imagenet')" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 3. Determine the baseline FP32 accuracy\n", @@ -188,10 +173,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -202,10 +184,7 @@ "attachments": {}, "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 4. Create a QuantizationSim Model and determine quantized accuracy\n", @@ -224,10 +203,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -239,10 +215,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "\n", @@ -262,10 +235,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -282,10 +252,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Compute Encodings\n", @@ -302,10 +269,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -313,7 +277,8 @@ "from tensorflow.keras.applications.resnet import preprocess_input\n", "\n", "def pass_calibration_data(sim_model, samples):\n", - " dataset = dataset_valid\n", + " dataset = dataset_valid.dataset\n", + " batch_size = dataset_valid.batch_size\n", "\n", " progbar = Progbar(samples)\n", "\n", @@ -323,19 +288,16 @@ "\n", " batch_cntr += 1\n", " progbar_stat_update = \\\n", - " batch_cntr * BATCH_SIZE if (batch_cntr * BATCH_SIZE) < samples else samples\n", + " batch_cntr * batch_size if (batch_cntr * batch_size) < samples else samples\n", " progbar.update(progbar_stat_update)\n", - " if (batch_cntr * BATCH_SIZE) > samples:\n", + " if (batch_cntr * batch_size) > samples:\n", " break" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "\n", @@ -346,10 +308,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -360,10 +319,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Compile the model\n", @@ -375,10 +331,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -388,10 +341,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Evaluate the performance of the quantized model\n", @@ -403,10 +353,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -416,10 +363,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 5. Perform QAT\n", @@ -432,11 +376,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "scrolled": true + "collapsed": false }, "outputs": [], "source": [ @@ -447,10 +387,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 6. Evaluate validation accuracy after QAT\n", @@ -462,10 +399,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -475,10 +409,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 7. Export the encodings\n", @@ -490,10 +421,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -505,10 +433,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Summary\n", @@ -522,21 +447,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "pygments_lexer": "ipython2", + "version": "3.8.0" }, "vscode": { "interpreter": { @@ -545,5 +470,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 0 } diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.html index 8856a8d..bacdeb2 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with Range Learning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,21 +1106,21 @@
      -
      +

      Quantization-Aware Training with Range Learning

      This notebook shows a working code example of how to use AIMET to perform Quantization-Aware Training(QAT) with range-learning. QAT with range-learning is an AIMET feature adding quantization simulation ops to a pre-trained model and using a standard training pipeline to fine-tune both the model and quantization parameters for a few epochs. While QAT fine-tunes only the model parameters, QAT with range-learning also learns encoding min/max of parameter quantizers(hence the name range-learning). The resulting model should show improved accuracy on quantized ML accelerators.

      The quantization parameters(like encoding min/max/scale/offset) for activations are computed once initially. During QAT, both the model weights and quantization parameters are jointly updated to minimize the effects of quantization in the forward pass.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load a pretrained FP32 model and determine the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simulation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model using QAT with range-learning and evaluate the simulation model to get a post fine-tuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters like number of epochs are deliberately chosen to have the notebook execute more quickly.

      -
      +

      Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1140,8 +1132,8 @@

        Example evaluation and training pipeline +

      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#)

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1150,9 +1142,7 @@

      Dataset
      [ ]:
       

      - -
      import os
      +
      import os
       os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
       
       import tensorflow as tf
       
      -
      -
      +
      +

      1. Load the dataset

      We defined a few utility functions and assign the training and validation dataset to dataset_train and dataset_valid respectively

      [ ]:
       
      -
      dataset_train = dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(
      -    directory=os.path.join(DATASET_DIR, "train"),
      -    labels="inferred",
      -    label_mode="categorical",
      -    batch_size=BATCH_SIZE,
      -    shuffle=True,
      -    image_size=IMAGE_SIZE
      -)
      -dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(
      -    directory=os.path.join(DATASET_DIR, "val"),
      -    labels="inferred",
      -    label_mode="categorical",
      -    batch_size=BATCH_SIZE,
      -    shuffle=False,
      -    image_size=IMAGE_SIZE
      -)
      +
      import glob
      +
      +def decode_example(example):
      +    """ Decode TFRecord and get image and label
      +    """
      +    features = tf.compat.v1.parse_single_example(example,
      +                                        features={'image/class/label': tf.compat.v1.FixedLenFeature([], tf.compat.v1.int64),
      +                                                    'image/encoded': tf.compat.v1.FixedLenFeature([], tf.compat.v1.string)})
      +    image_data = features['image/encoded']
      +    label = tf.compat.v1.cast(features['image/class/label'], tf.int32)
      +    image = tf.compat.v1.image.decode_jpeg(image_data, channels=3)
      +    return image, label
      +
      +def get_imagenet_dataset(dataset_path, split_name, num_parallel_reads=16):
      +    """Returns an decoded (image, label) TensorFlow Dataset object for ImageNet
      +    """
      +    # Create the TFRecordDataset
      +    glob_name = f'{os.path.join(dataset_path, split_name)}-*'
      +    tf_record_files = glob.glob(glob_name)
      +    dataset = tf.compat.v1.data.TFRecordDataset(filenames=tf_record_files, num_parallel_reads=num_parallel_reads)
      +    dataset = dataset.map(decode_example, num_parallel_calls=num_parallel_reads)
      +    return dataset
       
      -
      -
      +
      +
      [ ]:
      +
      +
      +
      dataset_train = get_imagenet_dataset(DATASET_DIR, 'train')
      +dataset_valid = get_imagenet_dataset(DATASET_DIR, 'validation')
      +
      +
      +
      +
      +

      2. Load a pretrained FP32 model

      For this example notebook, we are going to load a pretrained ResNet50 model from Keras. Similarly, you can load any pretrained Keras model instead.

      [ ]:
       
      -
      from tensorflow.keras.applications.resnet import ResNet50
      +
      from tensorflow.keras.applications.resnet import ResNet50
       
       model = ResNet50(weights='imagenet')
      -model.compile(optimizer="adam", loss="categorical_crossentropy")
       
      - -
      +
      +

      3. Determine the baseline FP32 accuracy

      Let’s determine the FP32 (floating point 32-bit) accuracy of this model using evaluate() routine

      [ ]:
       
      -
      model.evaluate(dataset_valid)
      +
      model.evaluate(dataset_valid)
       
      - -
      +
      +

      4. Create a QuantizationSim Model and determine quantized accuracy

      -
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1234,15 +1238,15 @@

      Fold Batch Normalization layers
      [ ]:
       

      -
      from aimet_tensorflow.keras.batch_norm_fold import fold_all_batch_norms
      +
      from aimet_tensorflow.keras.batch_norm_fold import fold_all_batch_norms
       
       _, model = fold_all_batch_norms(model)
       
      -
      - -
      +
      +
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “training_range_learning_with_tf_init” - This is the key setting that enables “range learning”. With this choice of quant scheme, AIMET will use the TF quant scheme to initialize the quantization parameters like scale/offset. And then those parameters are set to be trainable so they can continue to be updated during fine-tuning. - Another choice for quant_scheme is “training_range_learning_with_tf_enhanced_init”. Similar to the above, but the initialization for scale/offset is doing using the TF Enhanced scheme. Since in both schemes the quantization parameters are set to be trainable, there is not much benefit to using this choice instead of “training_range_learning_with_tf_init. - default_output_bw: Setting this to 8, essentially means that we @@ -1252,7 +1256,7 @@

      Create Quantization Sim Model
      [ ]:
       

      -
      from aimet_tensorflow.keras.quantsim import QuantizationSimModel
      +
      from aimet_tensorflow.keras.quantsim import QuantizationSimModel
       from aimet_common.defs import QuantScheme
       
       sim = QuantizationSimModel(model=model,
      @@ -1263,7 +1267,7 @@ 

      Create Quantization Sim Model +

      Compute Encodings

      Even though AIMET has wrapped the layers to act as being ‘quantized’ but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ layer. For activation quantization layers, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1274,11 +1278,13 @@

      Compute Encodings
      [ ]:
       

      -
      -
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
      +
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                             forward_pass_callback_args=1000)
       
      - -
      +
      +

      Compile the model

      Configure the model for training and evaluation. The model must be compiled before evaluation.

      [ ]:
       
      -
      sim.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
      +
      sim.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
       
      - -
      +
      +

      Evaluate the performance of the quantized model

      Next, we can evaluate the performance of the quantized model

      [ ]:
       
      -
      sim.evaluate(dataset_valid)
      +
      sim.evaluate(dataset_valid)
       
      - - -
      +
      +
      +

      5. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyperparameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so. For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1338,45 +1344,45 @@

      5. Perform QAT
      [ ]:
       

      -
      quantized_callback = tf.keras.callbacks.TensorBoard(log_dir="./log/quantized")
      +
      quantized_callback = tf.keras.callbacks.TensorBoard(log_dir="./log/quantized")
       history = sim.fit(dataset_train, epochs=1, validation_data=dataset_valid, callbacks=[quantized_callback])
       
      - -
      +
      +

      6. Evaluate validation accuracy after QAT

      Next, let’s evaluate the validation accuracy of our model after QAT

      [ ]:
       
      -
      sim.evaluate(dataset_valid)
      +
      sim.evaluate(dataset_valid)
       
      - -
      +
      +

      7. Export the encodings

      Finally, let’s compute and export the encodings of the model after performing QAT. When comparing the encodings file generated by this step and the encodings generated before quantization, there should be some differences. These differences are an artifact of QAT.

      [ ]:
       
      -
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
      +
      sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                             forward_pass_callback_args=1000)
       sim.export('./data', 'model_after_qat')
       
      - -
      +
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT with range-learning.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and vanilla QAT(without range-learning)

      - - - +
      +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.ipynb b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.ipynb index 064f7d8..df5b449 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.ipynb +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/qat_range_learning.ipynb @@ -3,10 +3,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "# Quantization-Aware Training with Range Learning\n", @@ -31,10 +28,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Example evaluation and training pipeline\n", @@ -51,10 +45,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Dataset\n", @@ -70,26 +61,18 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ - "DATASET_DIR = '/path/to/dir' # Please replace this with a real directory\n", - "BATCH_SIZE = 128\n", - "IMAGE_SIZE = (224, 224)" + "DATASET_DIR = '/path/to/dir/' # Please replace this with a real directory" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -102,10 +85,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 1. Load the dataset\n", @@ -117,38 +97,50 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ - "dataset_train = dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(\n", - " directory=os.path.join(DATASET_DIR, \"train\"),\n", - " labels=\"inferred\",\n", - " label_mode=\"categorical\",\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=True,\n", - " image_size=IMAGE_SIZE\n", - ")\n", - "dataset_valid = tf.keras.preprocessing.image_dataset_from_directory(\n", - " directory=os.path.join(DATASET_DIR, \"val\"),\n", - " labels=\"inferred\",\n", - " label_mode=\"categorical\",\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=False,\n", - " image_size=IMAGE_SIZE\n", - ")" + "import glob\n", + "\n", + "def decode_example(example):\n", + " \"\"\" Decode TFRecord and get image and label\n", + " \"\"\"\n", + " features = tf.compat.v1.parse_single_example(example,\n", + " features={'image/class/label': tf.compat.v1.FixedLenFeature([], tf.compat.v1.int64),\n", + " 'image/encoded': tf.compat.v1.FixedLenFeature([], tf.compat.v1.string)})\n", + " image_data = features['image/encoded']\n", + " label = tf.compat.v1.cast(features['image/class/label'], tf.int32)\n", + " image = tf.compat.v1.image.decode_jpeg(image_data, channels=3)\n", + " return image, label\n", + "\n", + "def get_imagenet_dataset(dataset_path, split_name, num_parallel_reads=16):\n", + " \"\"\"Returns an decoded (image, label) TensorFlow Dataset object for ImageNet\n", + " \"\"\"\n", + " # Create the TFRecordDataset\n", + " glob_name = f'{os.path.join(dataset_path, split_name)}-*'\n", + " tf_record_files = glob.glob(glob_name)\n", + " dataset = tf.compat.v1.data.TFRecordDataset(filenames=tf_record_files, num_parallel_reads=num_parallel_reads)\n", + " dataset = dataset.map(decode_example, num_parallel_calls=num_parallel_reads)\n", + " return dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "dataset_train = get_imagenet_dataset(DATASET_DIR, 'train')\n", + "dataset_valid = get_imagenet_dataset(DATASET_DIR, 'validation')" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 2. Load a pretrained FP32 model\n", @@ -160,26 +152,19 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ "from tensorflow.keras.applications.resnet import ResNet50\n", "\n", - "model = ResNet50(weights='imagenet')\n", - "model.compile(optimizer=\"adam\", loss=\"categorical_crossentropy\")" + "model = ResNet50(weights='imagenet')" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 3. Determine the baseline FP32 accuracy\n", @@ -191,10 +176,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -205,10 +187,7 @@ "attachments": {}, "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 4. Create a QuantizationSim Model and determine quantized accuracy\n", @@ -227,10 +206,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -243,10 +219,7 @@ "attachments": {}, "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Create Quantization Sim Model\n", @@ -266,10 +239,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -286,10 +256,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Compute Encodings\n", @@ -306,10 +273,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -317,7 +281,9 @@ "from tensorflow.keras.applications.resnet import preprocess_input\n", "\n", "def pass_calibration_data(sim_model, samples):\n", - " dataset = dataset_valid\n", + " dataset = dataset_valid.dataset\n", + " batch_size = dataset_valid.batch_size\n", + "\n", " progbar = Progbar(samples)\n", "\n", " batch_cntr = 0\n", @@ -326,19 +292,16 @@ "\n", " batch_cntr += 1\n", " progbar_stat_update = \\\n", - " batch_cntr * BATCH_SIZE if (batch_cntr * BATCH_SIZE) < samples else samples\n", + " batch_cntr * batch_size if (batch_cntr * batch_size) < samples else samples\n", " progbar.update(progbar_stat_update)\n", - " if (batch_cntr * BATCH_SIZE) > samples:\n", + " if (batch_cntr * batch_size) > samples:\n", " break" ] }, { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "\n", @@ -349,10 +312,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -363,10 +323,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Compile the model\n", @@ -378,10 +335,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -391,10 +345,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "### Evaluate the performance of the quantized model\n", @@ -406,10 +357,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -419,10 +367,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 5. Perform QAT\n", @@ -435,10 +380,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -449,10 +391,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 6. Evaluate validation accuracy after QAT\n", @@ -464,10 +403,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -477,10 +413,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## 7. Export the encodings\n", @@ -492,10 +425,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -507,10 +437,7 @@ { "cell_type": "markdown", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "source": [ "## Summary\n", @@ -524,21 +451,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "pygments_lexer": "ipython2", + "version": "3.8.0" }, "vscode": { "interpreter": { @@ -547,5 +474,5 @@ } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 0 } diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/quant_analyzer.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/quant_analyzer.html index afe7022..10fdf39 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/quant_analyzer.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/quant_analyzer.html @@ -1,8 +1,7 @@ - - + Quant Analyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,14 +1106,14 @@
      -
      +

      Quant Analyzer

      This notebook showcases a working code example of how to use AIMET to apply Quant Analyzer. Quant Analyzer is a feature which performs various analyses on a model to understand how each layer in the model responds to quantization.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation pipeline 2. Load the FP32 model 3. Apply QuantAnalyzer to the model

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results.

      • @@ -1129,7 +1121,7 @@

        What this notebook is not -
        +

        Dataset

        This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#)

        Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

        @@ -1142,9 +1134,9 @@

        Dataset -
        +

        1. Example evaluation and training pipeline

        The following is an example training and validation loop for this image classification task.

          @@ -1188,9 +1180,9 @@

          1. Example evaluation and training pipeline -
          +

          2. Load a pretrained FP32 model

          For this example notebook, we are going to load a pretrained ResNet50 model from Keras. Similarly, you can load any pretrained Keras model instead.

          @@ -1203,9 +1195,9 @@

          2. Load a pretrained FP32 model -
          +

          3. Apply QuantAnalyzer to the model

          QuantAnalyzer requires two functions to be defined by the user for passing data through the model:

          Forward pass callback

          @@ -1354,9 +1346,9 @@

          3. Apply QuantAnalyzer to the model +

          +

          +

          Per-layer analysis by enabling/disabling quantization wrappers

          • per_layer_quant_enabled.html: A plot with layers on the x-axis and model accuracy on the y-axis, where each layer’s accuracy represents the model accuracy when all quantizers in the model are disabled except for that layer’s parameter and activation quantizers.

          • @@ -1365,8 +1357,8 @@

            Per-layer analysis by enabling/disabling quantization wrappers

            -

          -
          +

        +

        Encoding min/max ranges

        • min_max_ranges: A folder containing the following sets of files:

          @@ -1379,24 +1371,24 @@

          Encoding min/max ranges

        min_max_ranges.html

        -
        -
        +

        +

        PDF of statistics

        • (If TF Enhanced quant scheme is used) activations_pdf: A folder containing html files for each layer, plotting the histogram of tensor values seen for that layer’s output activation seen during forward pass calibration.

        • (If TF Enhanced quant scheme is used) weights_pdf: A folder containing sub folders for each layer with weights. Each layer’s folder contains html files for each parameter of that layer, with a histogram plot of tensor values seen for that parameter seen during forward pass calibration.

        weights_pdf.html

        -
        -
        +

      +

      Per-layer MSE loss

      • (Optional, if per layer MSE loss is enabled) per_layer_mse_loss.html: A plot with layers on the x-axis and MSE loss on the y-axis, where each layer’s MSE loss represents the MSE seen comparing that layer’s outputs in the FP32 model vs. the quantized model.

      • (Optional, if per layer MSE loss is enabled) per_layer_mse_loss.json: A json file containing the data shown in per_layer_mse_loss.html, associating layer names with MSE loss.

      per_layer_mse_loss.html

      -
      - +
      +
      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_adaround_pcq.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_adaround_pcq.html index d82fc94..cb24a93 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_adaround_pcq.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_adaround_pcq.html @@ -1,8 +1,7 @@ - - + Quantsim and Adaround - Per Channel Quantization (PCQ) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Quantsim and Adaround - Per Channel Quantization (PCQ)

      This notebook illustrates the use of AIMET Adaround feature.

      AIMET quantization features, by default, use the “nearest rounding” technique for achieving quantization. When using the “nearest rounding” technique, the weight value is quantized to the nearest integer value. The Adaptive Rounding (AdaRound) feature, uses a smaller subset of the unlabelled training data to adaptively round the weights. AdaRound, optimizes a loss function using the unlabelled training data to adaptively decide whether to quantize a specific weight to the integer value near it @@ -1127,17 +1119,17 @@

      Quantsim and Adaround - Per Channel Quantization (PCQ)conv2d_layer = layers.Conv2D(filters=64, kernel_size=3)

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Apply PCQ Adaround and get corresponding model 4. Create a PCQ quantization simulation model (with fake quantization ops inserted) from the Adaround model and evaluate this simuation model to get a quantized accuracy score 5. Exporting the simulation models encodings and how to take them to SNPE/QNN

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1150,9 +1142,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Note: For per channel to be enabled, we pass a config file in which the config file has per_channel_quantization set to True. This config file will be used later on with Adaround as well. Having one place describing the quantization style ensures that we don’t mismatch when applying QuantSim and Adaround together.

      @@ -1307,9 +1299,9 @@

      Fold Batch Normalization layers

      Now the QuantizationSim model is ready to be used for inference. First we can pass this model to the same evaluation routine we used before. The evaluation routine will now give us a simulated quantized accuracy score for INT8 quantization instead of the FP32 accuracy score we saw before.

      - +


      -
      +

      4. Apply Adaround

      We can now apply AdaRound to this model in a per channel quantization fashion.

      Note: For per channel to be enabled, we pass a config file to apply_adaround in which the config file has per_channel_quantization set to True just as we did with QuantSim.

      @@ -1396,16 +1388,16 @@

      4. Apply Adaround -
      +

      Summary

      This example illustrated how AIMET AdaRound and QuantSim API is invoked to achieve post training quantization on a per channel basis. To use AIMET for your specific needs, replace the model with your model and replace the data pipeline with your data pipeline. This will provide you a quick starting point. As indicated above, some parameters in this example have been chosen in such a way way to make this example execute faster.

      Hope this notebook was useful for you to understand how to use AIMET for performing Adaround and QuantSim on a per channel basis.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      - + + + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.html b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.html index 5957909..afe6643 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.html @@ -1,8 +1,7 @@ - - + Cross-Layer Equalization (CLE) with QuantSim — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Cross-Layer Equalization (CLE) with QuantSim

      This notebook showcases a working code example of how to use AIMET to apply Cross-Layer Equalization (CLE) and use QuantSim. CLE is post-training quantization techniques that aims to improve quantized accuracy of a given model. CLE does not need any data samples. This technique help recover quantized accuracy when the model quantization is sensitive to parameter quantization as opposed to activation quantization.

      To learn more about this techniques, please refer to the “Data-Free Quantization Through Weight Equalization and Bias Correction” paper from ICCV 2019 - https://arxiv.org/abs/1906.04721

      Cross-Layer Equalization AIMET performs the following steps when running CLE: 1. Batch Norm Folding: Folds BN layers into Conv layers immediate before or after the Conv layers. 2. Cross-Layer Scaling: Given a set of consecutive Conv layers, equalizes the range of tensor values per-channel by scaling up/down per-channel weight tensor values of a layer and corresponding scaling down/up per-channel weight tensor values of the subsequent layer. 3. High Bias Folding: Cross-layer scaling may result in high bias parameter values for some layers. This technique folds some of the bias of a layer into the subsequent layer’s parameters.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply CLE, and evaluate the simulation model to get a post-finetuned quantized accuracy score 5. Exporting the simulation models encodings and how to take them to SNPE/QNN

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note: To speed up the execution of this notebook, you may use a reduced subset of the ImageNet dataset. E.g. the entire ILSVRC2012 dataset has 1000 classes, 1000 training samples per class and 50 validation samples per class. But for the purpose of running this notebook, you could perhaps reduce the dataset to say 2 samples per class. This exercise is left upto the reader and is not necessary.

      @@ -1143,9 +1135,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1245,9 +1237,9 @@

      Fold Batch Normalization layers -
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will wrap the Keras layers to mimic a layer as quantized. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision - num_batches: The number of batches used to evaluate the model while calculating the quantization encodings.Number of batches to use for computing encodings. Only 5 batches are used here to speed up the process. In addition, the number of images in these 5 @@ -1269,9 +1261,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has wrapped the layers to act as being ‘quantized’ but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ layer. For activation quantization layers, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1326,9 +1318,9 @@

      Compute Encodings -
      +

      4 Cross Layer Equalization

      The next cell performs cross-layer equalization on the model. As noted before, the function folds batch norms, applies cross-layer scaling, and then folds high biases.

      Note: Interestingly, CLE needs BN statistics for its procedure. If a BN folded model is provided, CLE will run the CLS (cross-layer scaling) optimization step but will skip the HBA (high-bias absorption) step. To avoid this, we simply load the original model again before running CLE.

      @@ -1361,9 +1353,9 @@

      4 Cross Layer Equalization -
      +

      5 Exporting

      Now the encodings for the QuantizationSimModel have been computed, they can be exported. Exporting can be done with the export function. This function will export the encodings in both a JSON and YAML file, a h5 model without wrappers, a Tensorflow 2 SavedModel, and a converted protobuff model from the h5 model. The converted protobuff model and the encodings exported can be then consumed by either SNPE/QNN.

      Note: export() takes a path to safe to, and a filename_prefix

      @@ -1378,15 +1370,15 @@

      5 Exporting -
      +

      Summary

      Hopefully this notebook was useful for you to understand how to use AIMET for performing Cross Layer Equalization (CLE).

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +

      +

      +

      diff --git a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.ipynb b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.ipynb index b64d223..86be6ee 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.ipynb +++ b/releases/1.33.0/Examples/tensorflow/quantization/keras/quantsim_cle.ipynb @@ -154,7 +154,7 @@ }, { "cell_type": "markdown", - "id": "1729f217", + "id": "2a767ffe", "metadata": { "collapsed": false }, @@ -266,7 +266,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56b0d4e5", + "id": "0de6b5f5", "metadata": { "collapsed": false }, @@ -318,7 +318,7 @@ }, { "cell_type": "markdown", - "id": "6fd66f19", + "id": "5c4cebd7", "metadata": { "collapsed": false }, @@ -330,7 +330,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d7b74d15", + "id": "26a3b3cd", "metadata": { "collapsed": false }, @@ -376,7 +376,7 @@ { "cell_type": "code", "execution_count": null, - "id": "97367cf8", + "id": "4bb7a969", "metadata": { "collapsed": false }, diff --git a/releases/1.33.0/Examples/tensorflow/quantization/qat.html b/releases/1.33.0/Examples/tensorflow/quantization/qat.html index c319db9..5b8ad4a 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/qat.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/qat.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Quantization-Aware Training

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training). QAT is an AIMET feature adding quantization simulation ops (also called fake quantization ops sometimes) to a trained ML model and using a standard training pipeline to fine-tune or train the model for a few epochs. The resulting model should show improved accuracy on quantized ML accelerators.

      AIMET supports two different types of QAT 1. Simply referred to as QAT - quantization parameters like per-tensor scale/offsets for activations are computed once. During fine-tuning, the model weights are updated to minimize the effects of quantization in the forward pass, keeping the quantization parameters constant. 2. Referred to as QAT with range-learning - quantization parameters like per-tensor scale/offsets for activations are computed initially. Then both the quantization parameters and the model weights are jointly updated during fine-tuning to minimize the effects of quantization in the forward pass.

      This notebook specifically shows working code example for #1 above. You can find a separate notebook for #2 in the same folder.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters like number of epochs to fine-tune are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1158,9 +1150,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1233,9 +1225,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

        Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

        @@ -1306,12 +1298,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +

      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1329,8 +1321,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1353,9 +1345,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node. For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1417,9 +1409,9 @@

      Compute Encodings -
      +

      4. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1454,15 +1446,15 @@

      4. Perform QAT -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT with range-learning

      -
      -

      -

      +

      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/qat_range_learning.html b/releases/1.33.0/Examples/tensorflow/quantization/qat_range_learning.html index 571dc83..6e0374f 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/qat_range_learning.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/qat_range_learning.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with Range Learning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Quantization-Aware Training with Range Learning

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training). QAT is an AIMET feature adding quantization simulation ops (also called fake quantization ops sometimes) to a trained ML model and using a standard training pipeline to fine-tune or train the model for a few epochs. The resulting model should show improved accuracy on quantized ML accelerators.

      AIMET supports two different types of QAT 1. Simply referred to as QAT - quantization parameters like per-tensor scale/offsets for activations are computed once. During fine-tuning, the model weights are updated to minimize the effects of quantization in the forward pass, keeping the quantization parameters constant. 2. Referred to as QAT with range-learning - quantization parameters like per-tensor scale/offsets for activations are computed initially. Then both the quantization parameters and the model weights are jointly updated during fine-tuning to minimize the effects of quantization in the forward pass.

      This notebook specifically shows working code example for #2 above. You can find a separate notebook for #1 in the same folder.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet50. Also, some optimization parameters like number of epochs to fine-tune are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

      Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

      @@ -1158,9 +1150,9 @@

      Dataset -
      +

      1. Example Evaluation and Training Pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1233,9 +1225,9 @@

        1. Example Evaluation and Training Pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained ResNet50 model from keras and covert it to a tensorflow session. Similarly, you can load any pretrained tensorflow model instead.

        Calling clear_session() releases the global state: this helps avoid clutter from old models and layers, especially when memory is limited.

        @@ -1308,12 +1300,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +

      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1331,8 +1323,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “training_range_learning_with_tf_init” - This is the key setting that enables “range learning”. With this choice of quant scheme, AIMET will use the TF quant scheme to initialize the quantization parameters like scale/offset. And then those parameters are set to be trainable so they can continue to be updated during fine-tuning. - Another choice for quant_scheme is “training_range_learning_with_tf_enhanced_init”. Similar to the above, but the initialization for scale/offset is doing using the TF Enhanced scheme. Since in both schemes the quantization parameters are set to be trainable, there is not much benefit to using this choice instead of “training_range_learning_with_tf_init. - default_output_bw: Setting this to 8, essentially means that we @@ -1355,9 +1347,9 @@

      Create Quantization Sim Model -
      +

      Compute Encodings

      Even though AIMET has added ‘quantizer’ nodes to the model graph but the model is not ready to be used yet. Before we can use the sim model for inference or training, we need to find appropriate scale/offset quantization parameters for each ‘quantizer’ node. For activation quantization nodes, we need to pass unlabeled data samples through the model to collect range statistics which will then let AIMET calculate appropriate scale/offset quantization parameters. This process is sometimes referred to as calibration. AIMET simply refers to it as ‘computing encodings’.

      @@ -1419,9 +1411,9 @@

      Compute Encodings -
      +

      4. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1456,15 +1448,15 @@

      4. Perform QAT -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT with range-learning.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and the vanilla QAT method (without range-learning)

      -
      -

      -

      +

      +
      + diff --git a/releases/1.33.0/Examples/tensorflow/quantization/quant_analyzer.html b/releases/1.33.0/Examples/tensorflow/quantization/quant_analyzer.html index 800b617..e34f79c 100644 --- a/releases/1.33.0/Examples/tensorflow/quantization/quant_analyzer.html +++ b/releases/1.33.0/Examples/tensorflow/quantization/quant_analyzer.html @@ -1,8 +1,7 @@ - - + Quant Analyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,14 +1106,14 @@
      -
      +

      Quant Analyzer

      This notebook showcases a working code example of how to use AIMET to apply Quant Analyzer. Quant Analyzer is a feature which performs various analyses on a model to understand how each op in the model responds to quantization.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation pipeline 2. Load the FP32 model 3. Apply QuantAnalyzer to the model

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results.

      • @@ -1129,7 +1121,7 @@

        What this notebook is not -
        +

        Dataset

        This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#) and convert them into tfrecords.

        Note1: The ImageNet tfrecords dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - A folder containing tfrecords files starting with ‘train*’ for training files and ‘valid*’ for validation files. Each tfrecord file should have features: ‘image/encoded’ for image data and ‘image/class/label’ for its corresponding class.

        @@ -1157,9 +1149,9 @@

        Dataset -
        +

        1. Example evaluation and training pipeline

        The following is an example training and validation loop for this image classification task.

        +

        Encoding min/max ranges

        • min_max_ranges: A folder containing the following sets of files:

          @@ -1462,24 +1454,24 @@

          Encoding min/max ranges

        min_max_ranges.html

        -
        -
        +

        +

        PDF of statistics

        • (If TF Enhanced quant scheme is used) activations_pdf: A folder containing html files for each op, plotting the histogram of tensor values seen for that op’s output activation seen during forward pass calibration.

        • (If TF Enhanced quant scheme is used) weights_pdf: A folder containing sub folders for each op with weights. Each op’s folder contains html files for each parameter of that op, with a histogram plot of tensor values seen for that parameter seen during forward pass calibration.

        weights_pdf.html

        -
        -
        +

      +

      Per op MSE loss

      • (Optional, only enabled when user has provided unlabeled dataset and number of batches) per_op_mse_loss.html: A plot with ops on the x-axis and MSE loss on the y-axis, where each op’s MSE loss represents the MSE seen comparing that op’s outputs in the FP32 model vs. the quantized model.

      • (Optional, only enabled when user has provided unlabeled dataset and number of batches) per_op_mse_loss.json: A json file containing the data shown in per_op_mse_loss.html, associating op names with MSE loss.

      per_op_mse_loss.html

      -
      - +
      +
      diff --git a/releases/1.33.0/Examples/torch/compression/channel_pruning.html b/releases/1.33.0/Examples/torch/compression/channel_pruning.html index 352f95c..d4ee3ee 100644 --- a/releases/1.33.0/Examples/torch/compression/channel_pruning.html +++ b/releases/1.33.0/Examples/torch/compression/channel_pruning.html @@ -1,8 +1,7 @@ - - + Model compression using Channel Pruning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model compression using Channel Pruning

      This notebook shows a working code example of how to use AIMET to perform model compression. The Channel Pruning technique is used in this notebook to achieve model compression.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,21 +1118,21 @@

      Model compression using Channel Pruning +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      3.1 Compress model using Channel Pruning and evaluate it to find post-compression accuracy
      3.2 Fine-tune the model
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1154,9 +1146,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1223,9 +1215,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate it to find the baseline accuracy

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1262,10 +1254,10 @@

        2. Load the model and evaluate it to find the baseline accuracy +

        +

        3. Compress the model and fine-tune

        -
        +

        3.1. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

        Now we use AIMET to define compression parameters for Channel Pruning, few of which are explained here

          @@ -1343,8 +1335,8 @@

          3.1. Compress model using Channel Pruning and evaluate it to find post-compr


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +
        +

        3.2. Fine-tune the model

        After the model is compressed using Channel Pruning, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1380,16 +1372,16 @@

        3.2. Fine-tune the model -
        +

        Summary

        Hope this notebook was useful for you to understand how to use AIMET for performing compression with Channel Pruning. As indicated above, some parameters have been chosen in a way to run the example faster.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

        -
        -

        -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/torch/compression/spatial_svd.html b/releases/1.33.0/Examples/torch/compression/spatial_svd.html index 084840a..d3ea8e3 100644 --- a/releases/1.33.0/Examples/torch/compression/spatial_svd.html +++ b/releases/1.33.0/Examples/torch/compression/spatial_svd.html @@ -1,8 +1,7 @@ - - + Model compression using Spatial SVD — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model compression using Spatial SVD

      This notebook shows a working code example of how to use AIMET to perform model compression. The Spatial SVD technique is used in this notebook to achieve model compression.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,21 +1118,21 @@

      Model compression using Spatial SVD +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      3.1 Compress model using Spatial SVD and evaluate it to find post-compression accuracy
      3.2 Fine-tune the model
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1154,9 +1146,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1210,9 +1202,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate it to find the baseline accuracy

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1249,10 +1241,10 @@

        2. Load the model and evaluate it to find the baseline accuracy +

        +

        3. Compress the model and fine-tune

        -
        +

        3.1. Compress model using Spatial SVD and evaluate it to find post-compression accuracy

        Now we use AIMET to define compression parameters for Spatial SVD, few of which are explained here

          @@ -1322,8 +1314,8 @@

          3.1. Compress model using Spatial SVD and evaluate it to find post-compressi


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +
        +

        3.2. Fine-tune the model

        After the model is compressed using Spatial SVD, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1359,16 +1351,16 @@

        3.2. Fine-tune the model -
        +

        Summary

        Hope this notebook was useful for you to understand how to use AIMET for performing compression with Spatial SVD. As indicated above, some parameters have been chosen in a way to run the example faster.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

        -
        -

        -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/torch/compression/spatial_svd_channel_pruning.html b/releases/1.33.0/Examples/torch/compression/spatial_svd_channel_pruning.html index d3b2f54..01195fa 100644 --- a/releases/1.33.0/Examples/torch/compression/spatial_svd_channel_pruning.html +++ b/releases/1.33.0/Examples/torch/compression/spatial_svd_channel_pruning.html @@ -1,8 +1,7 @@ - - + Model compression using Spatial SVD followed by Channel Pruning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Model compression using Spatial SVD followed by Channel Pruning

      This notebook shows a working code example of how to use AIMET to perform model compression. Two model-compression techniques are applied back-to-back: Spatial SVD followed by Channel Pruning.

      Here is a brief introduction to the techniques. Please refer to the AIMET user guide for more details.

      @@ -1126,7 +1118,7 @@

      Model compression using Spatial SVD followed by Channel Pruning +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the model and evaluate it to find the baseline accuracy 3. Compress the model and fine-tune:
      @@ -1135,14 +1127,14 @@

      Overall flow3.3 Compress model using Channel Pruning and evaluate it to find post-compression accuracy

      3.4 Fine-tune the model after Channel Pruning
      -

      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art compression results. For example, some optimization parameters such as num_comp_ratio_candidates, num_eval_iterations and epochs are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1156,9 +1148,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1225,9 +1217,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate it to find the baseline accuracy

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1264,10 +1256,10 @@

        2. Load the model and evaluate it to find the baseline accuracy +

        +

        3. Compress the model and fine-tune

        -
        +

        3.1. Compress model using Spatial SVD and evaluate it to find post-compression accuracy

        Now we use AIMET to define compression parameters for Spatial SVD, few of which are explained here

          @@ -1337,8 +1329,8 @@

          3.1. Compress model using Spatial SVD and evaluate it to find post-compressi


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +
        +

        3.2. Fine-tune the model after Spatial SVD

        After the model is compressed using Spatial SVD, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1374,8 +1366,8 @@

        3.2. Fine-tune the model after Spatial SVD +

        +

        3.3. Compress model using Channel Pruning and evaluate it to find post-compression accuracy

        The fine-tuned model, compressed with Spatial SVD, can be further compressed using Channel Pruning method.
        @@ -1446,8 +1438,8 @@

        3.3. Compress model using Channel Pruning and evaluate it to find post-compr


        As you can see the model accuracy fell sharply after compression. This is expected. We will use model fine-tuning to recover this accuracy back.

        -
        -
        +

      +

      3.4. Fine-tune the model after Channel Pruning

      After the model is compressed using Spatial SVD followed by Channel Pruning, we can simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1483,16 +1475,16 @@

      3.4. Fine-tune the model after Channel Pruning -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing compression with Spatial SVD followed by Channel Pruning. As indicated above, some parameters have been chosen in a way to run the example faster.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET compression and quantization techniques

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/torch/quantization/adaround.html b/releases/1.33.0/Examples/torch/quantization/adaround.html index 7a579a5..e84f526 100644 --- a/releases/1.33.0/Examples/torch/quantization/adaround.html +++ b/releases/1.33.0/Examples/torch/quantization/adaround.html @@ -1,8 +1,7 @@ - - + Adaptive Rounding (AdaRound) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,16 +1106,16 @@
      -
      +

      Adaptive Rounding (AdaRound)

      This notebook shows a working code example of how to use AIMET to perform Adaptive Rounding (AdaRound).

      AIMET quantization features typically use the “nearest rounding” technique for achieving quantization. When using the “nearest rounding” technique, the weight value is quantized to the nearest integer value.

      AdaRound optimizes a loss function using unlabeled training data to decide whether to quantize a specific weight to the closer integer value or the farther one. Using AdaRound quantization, a model is able to achieve an accuracy closer to the FP32 model, while using low bit-width integer quantization.

      -
      +

      Overall flow

      This notebook covers the following: 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply AdaRound and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this?

      @@ -1366,9 +1358,9 @@

      Fold Batch Normalization layers -
      +

      4. Apply Adaround

      We can now apply AdaRound to this model.

      Some of the parameters for AdaRound are described below

      @@ -1446,16 +1438,16 @@

      4. Apply Adaround -
      +

      Summary

      This example illustrated how the AIMET AdaRound API is invoked to achieve post training quantization. To use AIMET AdaRound for your specific needs, replace the model with your model and replace the data pipeline with your data pipeline. As indicated above, some parameters in this example have been chosen in such a way to make this example execute faster.

      We hope this notebook was useful for you to understand how to use AIMET for performing AdaRound.

      A few additional resources: - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/torch/quantization/autoquant.html b/releases/1.33.0/Examples/torch/quantization/autoquant.html index f56e717..79c3d08 100644 --- a/releases/1.33.0/Examples/torch/quantization/autoquant.html +++ b/releases/1.33.0/Examples/torch/quantization/autoquant.html @@ -1,8 +1,7 @@ - - + AutoQuant — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,20 +1106,20 @@
      -
      +

      AutoQuant

      This notebook shows a working code example of how to use AIMET AutoQuant feature.

      AIMET offers a suite of neural network post-training quantization (PTQ) techniques that can be applied in succession. However, the process of finding the right combination and sequence of techniques to apply is time-consuming and requires careful analysis, which can be challenging especially for non-expert users. We instead recommend AutoQuant to save time and effort.

      AutoQuant is an API that applies various PTQ techniques in AIMET automatically based on analyzing the model and best-known heuristics. In AutoQuant, users specify the amount of tolerable accuracy drop, and AutoQuant will apply PTQ techniques cumulatively until the target accuracy is satisfied.

      -
      +

      Overall flow

      This notebook covers the following 1. Define constants and helper functions 2. Load a pretrained FP32 model 3. Run AutoQuant

      -
      -
      +
      +

      What this notebook is not

      This notebook is not designed to show state-of-the-art AutoQuant results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1152,8 +1144,8 @@

      Dataset +

      +

      1. Define Constants and Helper functions

      In this section the constants and helper functions needed to run this eaxmple are defined.

        @@ -1204,8 +1196,8 @@

        1. Define Constants and Helper functions +

      +

      2. Load a pretrained FP32 model

      For this example, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

      @@ -1224,10 +1216,10 @@

      2. Load a pretrained FP32 model +

      +

      3. Run AutoQuant

      -
      +

      Create AutoQuant Object

      The AutoQuant feature utilizes an unlabeled dataset to achieve quantization. The class UnlabeledDatasetWrapper creates an unlabeled Dataset object from a labeled Dataset.

      @@ -1261,8 +1253,8 @@

      Create AutoQuant Object

      -
      -
      +
      +

      Run AutoQuant Inference

      This step runs AutoQuant inference. AutoQuant inference will run evaluation using the eval_callback with the vanilla quantized model without applying PTQ techniques. This will be useful for measuring the baseline evaluation score before running AutoQuant optimization.

      @@ -1274,8 +1266,8 @@

      Run AutoQuant Inference

      - -
      +
      +

      Set AdaRound Parameters (optional)

      AutoQuant uses a set of predefined default parameters for AdaRound. These values were determined empirically and work well with the common models. However, if necessary, you can also use your custom parameters for Adaround. In this notebook, we will use very small AdaRound parameters for faster execution.

      @@ -1291,8 +1283,8 @@

      Set AdaRound Parameters (optional) +

      +

      Run AutoQuant Optimization

      This step runs AutoQuant optimization, which returns the best possible quantized model, corresponding evaluation score and the path to the encoding file. The allowed_accuracy_drop parameter indicates the tolerable amount of accuracy drop. AutoQuant applies a series of quantization features until the target accuracy (FP32 accuracy - allowed accuracy drop) is satisfied. When the target accuracy is reached, AutoQuant will return immediately without applying furhter PTQ techniques. Please refer AutoQuant User Guide and API documentation for complete details.

      @@ -1305,16 +1297,16 @@

      Run AutoQuant Optimization -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET AutoQuant feature.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and parameters - Refer to the other example notebooks to understand how to use AIMET CLE and AdaRound features in a standalone fashion.

      -
      - - +

      +
      + diff --git a/releases/1.33.0/Examples/torch/quantization/bn_reestimation.html b/releases/1.33.0/Examples/torch/quantization/bn_reestimation.html index 9b062d0..b4fefcb 100644 --- a/releases/1.33.0/Examples/torch/quantization/bn_reestimation.html +++ b/releases/1.33.0/Examples/torch/quantization/bn_reestimation.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with BatchNorm Re-estimation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,19 +1106,19 @@
      -
      +

      Quantization-Aware Training with BatchNorm Re-estimation

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training) with batchnorm re-estimation. Batchnorm re-estimation is a technique for countering potential instability of batchnrom statistics (i.e. running mean and variance) during QAT. More specifically, batchnorm re-estimation recalculates the batchnorm statistics based on the model after QAT. By doing so, we aim to make our model learn batchnorm statistics from from stable outputs after QAT, rather than from likely noisy outputs during QAT.

      -
      +

      Overall flow

      This notebook covers the following steps: 1. Create a quantization simulation model with fake quantization ops inserted. 2. Finetune and evaluate the quantization simulation model 3. Re-estimate batchnorm statistics and compare the eval score before and after re-estimation. 4. Fold the re-estimated batchnorm layers and export the quantization simulation model

      -
      -
      +
      +

      What this notebook is not

      In this notebook, we will focus how to apply batchnorm re-estimation after QAT, rather than covering all the details about QAT itself. For more information about QAT, please refer to QAT notebook or QAT range learning notebook.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1140,9 +1132,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1206,9 +1198,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load FP32 model

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1229,11 +1221,11 @@

        2. Load FP32 model -
        +

        3. Create a quantization simulation model and Perform QAT

        -
        +

        Create Quantization Sim Model

        Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

        @@ -1279,8 +1271,8 @@

        Create Quantization Sim Model +

        +

        Perform QAT

        To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

        For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

        @@ -1302,12 +1294,12 @@

        Perform QAT -
        +

        4. Perform BatchNorm Reestimation

        -
        +

        Re-estimate BatchNorm Statistics

        AIMET provides a helper function, reestimate_bn_stats, for re-estimating batchnorm statistics. Here is the full list of parameters for this function: * model: Model to re-estimate the BatchNorm statistics. * dataloader Train dataloader. * num_batches (optional): The number of batches to be used for reestimation. (Default: 100) * forward_fn (optional): Optional adapter function that performs forward pass given a model and a input batch yielded from the data loader. If not specified, it is expected that inputs yielded from dataloader can be passed directly to the model.

        @@ -1333,8 +1325,8 @@

        Re-estimate BatchNorm Statistics +

        +

        Fold BatchNorm Layers

        So far, we have improved our quantization simulation model through QAT and batchnorm re-estimation. The next step would be to actually take this model to target. But first, we should fold the batchnorm layers for our model to run on target devices more efficiently.

        @@ -1347,10 +1339,10 @@

        Fold BatchNorm Layers

        -
        -

        +
        +


        -
        +

        5. Export Model

        As the final step, we will export the model to run it on actual target devices. AIMET QuantizationSimModel provides an export API for this purpose.

        @@ -1363,14 +1355,14 @@

        5. Export Model +

        +

        Summary

        Hope this notebook was useful for you to understand how to use batchnorm re-estimation feature of AIMET.

        Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters. - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT methods.

        -
        -
        -

      +

      +
      +
      diff --git a/releases/1.33.0/Examples/torch/quantization/cle_bc.html b/releases/1.33.0/Examples/torch/quantization/cle_bc.html index c64aa4f..d03940f 100644 --- a/releases/1.33.0/Examples/torch/quantization/cle_bc.html +++ b/releases/1.33.0/Examples/torch/quantization/cle_bc.html @@ -1,8 +1,7 @@ - - + Cross-Layer Equalization (CLE) and Bias Correction (BC) — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,7 +1106,7 @@
      -
      +

      Cross-Layer Equalization (CLE) and Bias Correction (BC)

      This notebook showcases a working code example of how to use AIMET to apply Cross-Layer Equalization (CLE) and Bias Correction (BC). CLE and BC are post-training quantization techniques that aim to improve quantized accuracy of a given model. CLE does not need any data samples. BC may optionally need unlabelled data samples. These techniques help recover quantized accuracy when the model quantization is sensitive to parameter quantization as opposed to activation quantization.

      To learn more about this techniques, please refer to the “Data-Free Quantization Through Weight Equalization and Bias Correction” paper from ICCV 2019 - https://arxiv.org/abs/1906.04721

      @@ -1124,17 +1116,17 @@

      Cross-Layer Equalization (CLE) and Bias Correction (BC)Bias Correction

      Quantization sometimes leads to a shift in layer outputs. This techniques helps correct this shift by adjusting the bias parameters of that layer. Note that this technique is generally applied after CLE, but it is a optional step.
      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Apply CLE, BC and and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1148,9 +1140,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1199,9 +1191,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1249,12 +1241,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1270,9 +1262,9 @@

      Fold Batch Normalization layers -
      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision - num_batches: The number of batches used to evaluate the model while calculating the quantization encodings.Number of batches to use for computing encodings. Only 5 batches are used here to speed up the process. In addition, the @@ -1374,9 +1366,9 @@

      Create Quantization Sim Model -
      +

      4. 1 Cross Layer Equalization

      The next cell performs cross-layer equalization on the model. As noted before, the function folds batch norms, applies cross-layer scaling, and then folds high biases.

      Note: Interestingly, CLE needs BN statistics for its procedure. If a BN folded model is provided, CLE will run the CLS (cross-layer scaling) optimization step but will skip the HBA (high-bias absorption) step. To avoid this, we simply load the original model again before running CLE.

      @@ -1425,9 +1417,9 @@

      4. 1 Cross Layer Equalization -
      +

      4. 2 Bias Correction

      This section shows how we can apply AIMET Bias Correction on top of the already equalized model from the previous step. Bias correction under the hood uses a reference FP32 model and a QuantizationSimModel to perform its procedure. More details are explained in the AIMET User Guide documentation.

      For the correct_bias API, we pass the following parameters

      @@ -1486,15 +1478,15 @@

      4. 2 Bias Correction

      -
      +


      -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing Cross Layer Equalization (CLE) and Bias Correction (BC).

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT techniques

      -
      - - + + + diff --git a/releases/1.33.0/Examples/torch/quantization/qat.html b/releases/1.33.0/Examples/torch/quantization/qat.html index 9e1bc0f..157c9b5 100644 --- a/releases/1.33.0/Examples/torch/quantization/qat.html +++ b/releases/1.33.0/Examples/torch/quantization/qat.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Quantization-Aware Training

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training). QAT is an AIMET feature adding quantization simulation ops (also called fake quantization ops sometimes) to a trained ML model and using a standard training pipeline to fine-tune or train the model for a few epochs. The resulting model should show improved accuracy on quantized ML accelerators.

      AIMET supports two different types of QAT 1. Simply referred to as QAT - quantization parameters like per-tensor scale/offsets for activations are computed once. During fine-tuning, the model weights are updated to minimize the effects of quantization in the forward pass, keeping the quantization parameters constant. 2. Referred to as QAT with range-learning - quantization parameters like per-tensor scale/offsets for activations are computed initially. Then both the quantization parameters and the model weights are jointly updated during fine-tuning to minimize the effects of quantization in the forward pass.

      This notebook specifically shows working code example for #1 above. You can find a separate notebook for #2 in the same folder.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters like number of epochs to fine-tune are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1144,9 +1136,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1210,9 +1202,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1260,12 +1252,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1281,8 +1273,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “QuantScheme.post_training_tf_enhanced” - Supported options are ‘tf_enhanced’ or ‘tf’ or using Quant Scheme Enum QuantScheme.post_training_tf or QuantScheme.post_training_tf_enhanced - default_output_bw: Setting this to 8, essentially means that we are asking AIMET to perform all activation quantizations in the model using integer 8-bit precision - default_param_bw: Setting this to 8, essentially means that we are asking AIMET to perform all parameter quantizations in the model using integer 8-bit precision

      @@ -1383,9 +1375,9 @@

      Create Quantization Sim Model -
      +

      4. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1421,14 +1413,14 @@

      4. Perform QAT +

      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters. - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and QAT with range-learning.

      -
      -

      - +
      +
      + diff --git a/releases/1.33.0/Examples/torch/quantization/qat_range_learning.html b/releases/1.33.0/Examples/torch/quantization/qat_range_learning.html index c5fbdb1..faca2b4 100644 --- a/releases/1.33.0/Examples/torch/quantization/qat_range_learning.html +++ b/releases/1.33.0/Examples/torch/quantization/qat_range_learning.html @@ -1,8 +1,7 @@ - - + Quantization-Aware Training with Range Learning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,23 +1106,23 @@
      -
      +

      Quantization-Aware Training with Range Learning

      This notebook shows a working code example of how to use AIMET to perform QAT (Quantization-aware training). QAT is a technique where AIMET adds quantization simulation ops (also called fake quantization ops sometimes) to a trained ML model and use a standard training pipeline to fine-tune or train the model for a few epochs. The resulting model should show improved accuracy on quantized ML accelerators.

      AIMET supports two different types of QAT 1. Simply referred to as QAT - quantization parameters like per-tensor scale/offsets for activations are computed once. During fine-tuning, the model weights are updated to minimize the effects of quantization in the forward pass, keeping the quantization parameters constant. 2. Referred to as QAT with range-learning - quantization parameters like per-tensor scale/offsets for activations are computed initially. Then both the quantization parameters and the model weights are jointly updated during fine-tuning to minimize the effects of quantization in the forward pass.

      This notebook specifically shows working code example for #2 above. You can find a separate notebook for #1 in the same folder.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation and training pipeline 2. Load the FP32 model and evaluate the model to find the baseline FP32 accuracy 3. Create a quantization simulation model (with fake quantization ops inserted) and evaluate this simuation model to get a quantized accuracy score 4. Fine-tune the quantization simulation model and evaluate the simulation model to get a post-finetuned quantized accuracy score

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art QAT results. For example, it uses a relatively quantization-friendly model like Resnet18. Also, some optimization parameters like number of epochs to fine-tune are deliberately chosen to have the notebook execute more quickly.


      -
      +

      Dataset

      This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

      Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

      @@ -1144,9 +1136,9 @@

      Dataset -
      +

      1. Example evaluation and training pipeline

      The following is an example training and validation loop for this image classification task.

        @@ -1210,9 +1202,9 @@

        1. Example evaluation and training pipeline -
        +

        2. Load the model and evaluate to get a baseline FP32 accuracy score

        For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

        @@ -1260,12 +1252,12 @@

        2. Load the model and evaluate to get a baseline FP32 accuracy score

      -
      +


      -
      +
      -
      +
      +

      Fold Batch Normalization layers

      Before we determine the simulated quantized accuracy using QuantizationSimModel, we will fold the BatchNormalization (BN) layers in the model. These layers get folded into adjacent Convolutional layers. The BN layers that cannot be folded are left as they are.

      Why do we need to this? On quantized runtimes (like TFLite, SnapDragon Neural Processing SDK, etc.), it is a common practice to fold the BN layers. Doing so, results in an inferences/sec speedup since unnecessary computation is avoided. Now from a floating point compute perspective, a BN-folded model is mathematically equivalent to a model with BN layers from an inference perspective, and produces the same accuracy. However, folding the BN layers can increase the range of the tensor values @@ -1281,8 +1273,8 @@

      Fold Batch Normalization layers +

      +

      Create Quantization Sim Model

      Now we use AIMET to create a QuantizationSimModel. This basically means that AIMET will insert fake quantization ops in the model graph and will configure them. A few of the parameters are explained here - quant_scheme: We set this to “training_range_learning_with_tf_init” - This is the key setting that enables “range learning”. With this choice of quant scheme, AIMET will use the TF quant scheme to initialize the quantization parameters like scale/offset. And then those parameters are set to be trainable so they can continue to be updated during fine-tuning. - Another choice for quant_scheme is “training_range_learning_with_tf_enhanced_init”. Similar to the above, but the initialization for scale/offset is doing using the TF Enhanced scheme. Since in both schemes the quantization parameters are set to be trainable, there is not much benefit to using this choice instead of “training_range_learning_with_tf_init. - default_output_bw: Setting this to 8, essentially means that we @@ -1384,9 +1376,9 @@

      Create Quantization Sim Model -
      +

      4. Perform QAT

      To perform quantization aware training (QAT), we simply train the model for a few more epochs (typically 15-20). As with any training job, hyper-parameters need to be searched for optimal results. Good starting points are to use a learning rate on the same order as the ending learning rate when training the original model, and to drop the learning rate by a factor of 10 every 5 epochs or so.

      For the purpose of this example notebook, we are going to train only for 1 epoch. But feel free to change these parameters as you see fit.

      @@ -1422,15 +1414,15 @@

      4. Perform QAT -
      +

      Summary

      Hope this notebook was useful for you to understand how to use AIMET for performing QAT with range-learning.

      Few additional resources - Refer to the AIMET API docs to know more details of the APIs and optional parameters. - Refer to the other example notebooks to understand how to use AIMET post-training quantization techniques and the vanilla QAT method (without range-learning).

      -
      -

      -

      +
      +
      + diff --git a/releases/1.33.0/Examples/torch/quantization/quant_analyzer.html b/releases/1.33.0/Examples/torch/quantization/quant_analyzer.html index abecb07..c2d70b6 100644 --- a/releases/1.33.0/Examples/torch/quantization/quant_analyzer.html +++ b/releases/1.33.0/Examples/torch/quantization/quant_analyzer.html @@ -1,8 +1,7 @@ - - + Quant Analyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -64,7 +63,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -79,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -100,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -118,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1114,14 +1106,14 @@
      -
      +

      Quant Analyzer

      This notebook showcases a working code example of how to use AIMET to apply Quant Analyzer. Quant Analyzer is a feature which performs various analyses on a model to understand how each layer in the model responds to quantization.

      -
      +

      Overall flow

      This notebook covers the following 1. Instantiate the example evaluation pipeline 2. Load the FP32 model 3. Apply QuantAnalyzer to the model

      -
      -
      +
      +

      What this notebook is not

      • This notebook is not designed to show state-of-the-art results.

      • @@ -1129,7 +1121,7 @@

        What this notebook is not -
        +

        Dataset

        This notebook relies on the ImageNet dataset for the task of image classification. If you already have a version of the dataset readily available, please use that. Else, please download the dataset from appropriate location (e.g. https://image-net.org/challenges/LSVRC/2012/index.php#).

        Note1: The ImageNet dataset typically has the following characteristics and the dataloader provided in this example notebook rely on these - Subfolders ‘train’ for the training samples and ‘val’ for the validation samples. Please see the pytorch dataset description for more details. - A subdirectory per class, and a file per each image sample

        @@ -1143,9 +1135,9 @@

        Dataset -
        +

        1. Example evaluation and training pipeline

        The following is an example training and validation loop for this image classification task.

          @@ -1194,9 +1186,9 @@

          1. Example evaluation and training pipeline -
          +

          2. Load the model

          For this example notebook, we are going to load a pretrained resnet18 model from torchvision. Similarly, you can load any pretrained PyTorch model instead.

          @@ -1234,9 +1226,9 @@

          2. Load the model -
          +

          3. Apply QuantAnalyzer to the model

          QuantAnalyzer requires two functions to be defined by the user for passing data through the model:

          Forward pass callback

          @@ -1412,9 +1404,9 @@

          3. Apply QuantAnalyzer to the model +

          +

          +

          Per-layer analysis by enabling/disabling quantization wrappers

          • per_layer_quant_enabled.html: A plot with layers on the x-axis and model accuracy on the y-axis, where each layer’s accuracy represents the model accuracy when all quantizers in the model are disabled except for that layer’s parameter and activation quantizers.

          • @@ -1423,8 +1415,8 @@

            Per-layer analysis by enabling/disabling quantization wrappers

            -

          -
          +

        +

        Encoding min/max ranges

        • min_max_ranges: A folder containing the following sets of files:

          @@ -1437,24 +1429,24 @@

          Encoding min/max ranges

        min_max_ranges.html

        -
        -
        +

        +

        PDF of statistics

        • (If TF Enhanced quant scheme is used) activations_pdf: A folder containing html files for each layer, plotting the histogram of tensor values seen for that layer’s output activation seen during forward pass calibration.

        • (If TF Enhanced quant scheme is used) weights_pdf: A folder containing sub folders for each layer with weights. Each layer’s folder contains html files for each parameter of that layer, with a histogram plot of tensor values seen for that parameter seen during forward pass calibration.

        weights_pdf.html

        -
        -
        +

      +

      Per-layer MSE loss

      • (Optional, if per layer MSE loss is enabled) per_layer_mse_loss.html: A plot with layers on the x-axis and MSE loss on the y-axis, where each layer’s MSE loss represents the MSE seen comparing that layer’s outputs in the FP32 model vs. the quantized model.

      • (Optional, if per layer MSE loss is enabled) per_layer_mse_loss.json: A json file containing the data shown in per_layer_mse_loss.html, associating layer names with MSE loss.

      per_layer_mse_loss.html

      -
      - +
      +
      diff --git a/releases/1.33.0/_modules/aimet_common/bias_correction.html b/releases/1.33.0/_modules/aimet_common/bias_correction.html index 036d706..9f745ee 100644 --- a/releases/1.33.0/_modules/aimet_common/bias_correction.html +++ b/releases/1.33.0/_modules/aimet_common/bias_correction.html @@ -60,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1194,17 +1187,13 @@

      Source code for aimet_tensorflow.keras.model_preparer

      class _KerasModelPreparer: - def __init__( - self, - original_model: Optional[tf.keras.Model] = None, - input_layer: Optional[tf.keras.layers.InputLayer] = None - ): + def __init__(self, original_model: tf.keras.Model = None, input_layer: tf.keras.layers.InputLayer = None): self.model_outputs = [] # Both normal init and "passthrough" init utilize this if original_model: self.input_layer = self._format_input_layer(original_model, input_layer) if self._inherits_from_keras_model(original_model): - _logger.debug("This model inherits from tf.keras.Model. Need to connect model.") + _logger.info("This model inherits from tf.keras.Model, connecting model...") self.original_model = self._connect_inherited_model(original_model, input_layer, is_original_model=True) else: @@ -1279,10 +1268,8 @@

      Source code for aimet_tensorflow.keras.model_preparer

      Set the functional model's weights to the original model's weights in the correct order """ - assert self.prepared_model, ( - "The prepared model must be created before setting weights. Please call " - "prepare_model() before calling set_weights()." - ) + assert self.prepared_model, "The prepared model must created before setting weights. Please call " \ + "prepare_model() before calling set_weights()." try: self.prepared_model.set_weights(self._get_original_models_weights_in_functional_model_order()) @@ -1299,7 +1286,7 @@

      Source code for aimet_tensorflow.keras.model_preparer

      @staticmethod def _format_input_layer( original_model: tf.keras.Model, - input_layer: Union[tf.keras.layers.InputLayer, List[tf.keras.layers.InputLayer], dict] + input_layer: Union[tf.keras.layers.InputLayer, List[tf.keras.layers.InputLayer]] = None ) -> tf.keras.layers.Layer: """ This function formats the input layer by either using the original models input layer or the user provided @@ -1312,9 +1299,6 @@

      Source code for aimet_tensorflow.keras.model_preparer

      """ if hasattr(original_model, "input"): input_layer = original_model.input - elif isinstance(input_layer, tf.keras.layers.InputLayer): - _logger.info("Input layer explicitly passed in") - return input_layer else: _logger.info("Input layer not found. Using input layer passed in.") if input_layer is None: @@ -1605,7 +1589,7 @@

      Source code for aimet_tensorflow.keras.model_preparer

      name=_TEMP_MODEL_NAME) _logger.debug("Model created for layer '%s'", layer.name) except TypeError as e: - if "call() got an unexpected keyword argument 'training'" in e.__str__(): + if "call() got an unexpected keyword argument 'training'" in e.args: _logger.error( "Model preparer calls subclassed layers call functions with the parameter 'training=False', " "in the case that the layer behaves differently during evaluation. Please add **kwargs to your " diff --git a/releases/1.33.0/_modules/aimet_tensorflow/keras/quant_analyzer.html b/releases/1.33.0/_modules/aimet_tensorflow/keras/quant_analyzer.html index 131d4a6..98b1052 100644 --- a/releases/1.33.0/_modules/aimet_tensorflow/keras/quant_analyzer.html +++ b/releases/1.33.0/_modules/aimet_tensorflow/keras/quant_analyzer.html @@ -60,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1162,10 +1155,9 @@

      Source code for aimet_torch.peft

       # pylint: disable=import-error
       # pylint: disable=no-name-in-module
       from peft.tuners.lora.layer import LoraLayer as PeftLoraLayer
      -from peft.tuners.lora.layer import Conv2d as PeftConv2d
       
       from aimet_torch.utils import replace_modules_of_type1_using_constructor
      -from aimet_torch.elementwise_ops import Add, Multiply
      +from aimet_torch.elementwise_ops import Add
       from aimet_torch.v2.quantsim import QuantizationSimModel
       from aimet_torch.quantsim import ExportableQuantModule
       from aimet_torch.v2.nn import BaseQuantizationMixin
      @@ -1196,7 +1188,6 @@ 

      Source code for aimet_torch.peft

               self.in_features = lora_layer.in_features
               self.out_features = lora_layer.out_features
               self.add_lora_to_res = Add()
      -        self.mul_scale = Multiply()
       
           def _swap_module_dict_with_list(self, lora_layer):
               for index, adapter_name in enumerate(lora_layer.lora_A):
      @@ -1224,7 +1215,7 @@ 

      Source code for aimet_torch.peft

                   scaling = self.scaling[active_adapter]
                   x = x.to(lora_A.weight.dtype)
       
      -            result = self.add_lora_to_res(result, lora_B(self.mul_scale(lora_A(dropout(x)), scaling)))
      +            result = self.add_lora_to_res(result, lora_B(lora_A(dropout(x)) * scaling))
       
               result = result.to(torch_result_dtype)
               return result
      @@ -1237,7 +1228,6 @@ 

      Source code for aimet_torch.peft

           :param model: PEFT model
           """
           replace_modules_of_type1_using_constructor(model, PeftLoraLayer, LoraLayer)
      -    replace_modules_of_type1_using_constructor(model, PeftConv2d, LoraLayer)
       
       
       
      [docs]class AdapterMetaData: diff --git a/releases/1.33.0/_modules/aimet_torch/quant_analyzer.html b/releases/1.33.0/_modules/aimet_torch/quant_analyzer.html index e9bf095..92b9a00 100644 --- a/releases/1.33.0/_modules/aimet_torch/quant_analyzer.html +++ b/releases/1.33.0/_modules/aimet_torch/quant_analyzer.html @@ -60,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1755,7 +1748,8 @@

      Source code for aimet_torch.quantsim

                       else:
                           kwargs = onnx_export_args
                       torch.onnx.export(original_model, dummy_input, onnx_path, **kwargs)
      -                save_initializer_restored_onnx_graph(onnx_path, onnx_path)
      +                if onnx_utils.RESTORE_ONNX_MODEL_INITIALIZERS:
      +                    save_initializer_restored_onnx_graph(onnx_path, onnx_path)
                   else:
                       # Create onnx model and obtain node to i/o tensor name map
                       OnnxSaver.create_onnx_model_with_pytorch_layer_names(onnx_path, original_model, dummy_input, is_conditional,
      diff --git a/releases/1.33.0/_modules/aimet_torch/v2/nn/base.html b/releases/1.33.0/_modules/aimet_torch/v2/nn/base.html
      index 95e33a8..0a184b5 100644
      --- a/releases/1.33.0/_modules/aimet_torch/v2/nn/base.html
      +++ b/releases/1.33.0/_modules/aimet_torch/v2/nn/base.html
      @@ -60,7 +60,6 @@
       
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1223,26 +1216,11 @@

      Source code for aimet_torch.v2.quantization.tensor

      # Operations that a per-tensor encoding can pass through _pertensor_passthrough_ops = { - torch.Tensor.__getitem__, - torch.Tensor.as_strided, torch.Tensor.broadcast_to, - torch.Tensor.chunk, - torch.Tensor.dsplit, torch.Tensor.expand, torch.Tensor.expand_as, torch.Tensor.flatten, - torch.Tensor.flip, - torch.Tensor.fliplr, - torch.Tensor.flipud, - torch.Tensor.gather, - torch.Tensor.hsplit, - torch.Tensor.index_select, - torch.Tensor.kthvalue, torch.Tensor.masked_select, - torch.Tensor.movedim, - torch.Tensor.moveaxis, - torch.Tensor.msort, - torch.Tensor.narrow, torch.Tensor.permute, torch.Tensor.repeat, torch.Tensor.reshape, @@ -1250,61 +1228,33 @@

      Source code for aimet_torch.v2.quantization.tensor

      torch.Tensor.resize, torch.Tensor.resize_as, torch.Tensor.select, - torch.Tensor.split, torch.Tensor.squeeze, torch.Tensor.swapaxes, torch.Tensor.swapdims, torch.Tensor.t, - torch.Tensor.take, - torch.Tensor.take_along_dim, - torch.Tensor.tensor_split, - torch.Tensor.tile, torch.Tensor.transpose, torch.Tensor.unflatten, torch.Tensor.unsqueeze, torch.Tensor.view, torch.Tensor.view_as, torch.as_strided, - torch.as_strided_copy, - torch.chunk, - torch.dsplit, - torch.expand_copy, + #torch.as_strided_copy, TODO: Uncomment when pytorch 1.9 support is fully deprecated + #torch.expand_copy, torch.flatten, - torch.flip, - torch.fliplr, - torch.flipud, - torch.gather, - torch.hsplit, - torch.index_select, - torch.masked_select, - torch.moveaxis, - torch.movedim, - torch.narrow, - torch.narrow_copy, torch.permute, - torch.permute_copy, + #torch.permute_copy, torch.reshape, - torch.select, - torch.split, torch.squeeze, - torch.squeeze_copy, - torch.swapaxes, + #torch.squeeze_copy, torch.swapdims, torch.t, - torch.take, - torch.take_along_dim, - torch.tensor_split, - torch.tile, - torch.t_copy, - torch.unbind, - torch.unflatten, + #torch.t_copy, + #torch.unflatten, torch.unsqueeze, - torch.unsqueeze_copy, - torch.vsplit, - torch.view_copy, + #torch.unsqueeze_copy, + #torch.view_copy } - @abc.abstractmethod def quantize(self) -> "QuantizedTensor": """ @@ -1363,17 +1313,6 @@

      Source code for aimet_torch.v2.quantization.tensor

      ret.encoding = encoding return ret - def new_empty(self, size, *, dtype=None, device=None, requires_grad=False, - layout=torch.strided, pin_memory=False, **kwargs) -> "QuantizedTensorBase": - # PyTorch requires subclasses of torch.Tensor to override this method such that - # it returns an instance of the subclass, not a plain torch.Tensor, - # for the subclass to be deep-copyable - encoding = kwargs.pop('encoding', None) - t = super().new_empty(size, dtype=dtype, device=device, requires_grad=requires_grad, - layout=layout, pin_memory=pin_memory, **kwargs).as_subclass(type(self)) - t.encoding = encoding - return t - @implements(torch.clone) def clone(self, *, memory_format=torch.preserve_format): """ @@ -1420,26 +1359,21 @@

      Source code for aimet_torch.v2.quantization.tensor

      self, *_ = args ret.encoding = copy.copy(self.encoding) # shallow copy - def propagate_encoding(qtensor, encoding): - if isinstance(qtensor, QuantizedTensorBase): - qtensor.encoding = copy.copy(encoding) - if func in cls._passthrough_ops: self, *_ = args - tree_map(lambda t: propagate_encoding(t, self.encoding), ret) + ret.encoding = copy.copy(self.encoding) if func in cls._pertensor_passthrough_ops: self, *_ = args - if self.encoding and self.encoding.granularity == "pertensor": + if self.encoding.granularity == "pertensor": # Return a cls object with the same encoding which can later be quantized or dequantized - tree_map(lambda t: propagate_encoding(t, self.encoding), ret) + ret.encoding = copy.copy(self.encoding) else: # Return a cls object with no encoding # If the user later tries to quantize or dequantize this, an error will be thrown - tree_map(lambda t: propagate_encoding(t, None), ret) + ret.encoding = None return ret - def set_encoding(qtensor): if not hasattr(qtensor, 'encoding'): qtensor.encoding = None @@ -1518,6 +1452,24 @@

      Source code for aimet_torch.v2.quantization.tensor

      without further loss in information. """ + def __getstate__(self): + state = self.__dict__ + state["data"] = self.data + state["encoding"] = self.encoding + return state + + def __setstate__(self, state): + self.data = state["data"] + self.encoding = state["encoding"] + + def __deepcopy__(self, memo): + new_instance = type(self).__new__(type(self)) + state = self.__getstate__() + new_instance.__setstate__(state) + new_instance.encoding = copy.deepcopy(state["encoding"]) + new_instance.data = copy.deepcopy(state["data"]) + return new_instance +
      [docs] def quantize(self) -> QuantizedTensor: """ Quantizes ``self`` using :attr:`self.encoding` to produce a :class:`QuantizedTensor` with the same encoding diff --git a/releases/1.33.0/_modules/aimet_torch/v2/quantsim/config_utils.html b/releases/1.33.0/_modules/aimet_torch/v2/quantsim/config_utils.html index 39d01b8..506deb2 100644 --- a/releases/1.33.0/_modules/aimet_torch/v2/quantsim/config_utils.html +++ b/releases/1.33.0/_modules/aimet_torch/v2/quantsim/config_utils.html @@ -60,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -75,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -96,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -114,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      diff --git a/releases/1.33.0/_static/basic.css b/releases/1.33.0/_static/basic.css index 4e9a9f1..eeb0519 100644 --- a/releases/1.33.0/_static/basic.css +++ b/releases/1.33.0/_static/basic.css @@ -236,6 +236,16 @@ div.body p, div.body dd, div.body li, div.body blockquote { a.headerlink { visibility: hidden; } +a.brackets:before, +span.brackets > a:before{ + content: "["; +} + +a.brackets:after, +span.brackets > a:after { + content: "]"; +} + h1:hover > a.headerlink, h2:hover > a.headerlink, @@ -324,15 +334,11 @@ aside.sidebar { p.sidebar-title { font-weight: bold; } -nav.contents, -aside.topic, div.admonition, div.topic, blockquote { clear: left; } /* -- topics ---------------------------------------------------------------- */ -nav.contents, -aside.topic, div.topic { border: 1px solid #ccc; padding: 7px; @@ -371,8 +377,6 @@ div.body p.centered { div.sidebar > :last-child, aside.sidebar > :last-child, -nav.contents > :last-child, -aside.topic > :last-child, div.topic > :last-child, div.admonition > :last-child { margin-bottom: 0; @@ -380,8 +384,6 @@ div.admonition > :last-child { div.sidebar::after, aside.sidebar::after, -nav.contents::after, -aside.topic::after, div.topic::after, div.admonition::after, blockquote::after { @@ -606,26 +608,19 @@ ol.simple p, ul.simple p { margin-bottom: 0; } -aside.footnote > span, -div.citation > span { +dl.footnote > dt, +dl.citation > dt { float: left; + margin-right: 0.5em; } -aside.footnote > span:last-of-type, -div.citation > span:last-of-type { - padding-right: 0.5em; -} -aside.footnote > p { - margin-left: 2em; -} -div.citation > p { - margin-left: 4em; -} -aside.footnote > p:last-of-type, -div.citation > p:last-of-type { + +dl.footnote > dd, +dl.citation > dd { margin-bottom: 0em; } -aside.footnote > p:last-of-type:after, -div.citation > p:last-of-type:after { + +dl.footnote > dd:after, +dl.citation > dd:after { content: ""; clear: both; } @@ -641,6 +636,10 @@ dl.field-list > dt { padding-left: 0.5em; padding-right: 5px; } +dl.field-list > dt:after { + content: ":"; +} + dl.field-list > dd { padding-left: 0.5em; diff --git a/releases/1.33.0/api_docs/convert_tf_sess_to_keras.html b/releases/1.33.0/api_docs/convert_tf_sess_to_keras.html index f7f708e..815d309 100644 --- a/releases/1.33.0/api_docs/convert_tf_sess_to_keras.html +++ b/releases/1.33.0/api_docs/convert_tf_sess_to_keras.html @@ -1,8 +1,7 @@ - - + Using AIMET Tensorflow APIs with Keras Models — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1119,18 +1111,18 @@
      -
      +

      AIMET Keras BatchNorm Re-estimation APIs

      - -
      +
      +

      Introduction

      AIMET functionality for Keras BatchNorm Re-estimation recalculates the batchnorm statistics based on the model after QAT. By doing so, we aim to make our model learn batchnorm statistics from from stable outputs after QAT, rather than from likely noisy outputs during QAT.

      -
      -
      +
      +

      Top-level APIs

      API for BatchNorm Re-estimation

      @@ -1138,17 +1130,17 @@

      Top-level APIsaimet_tensorflow.keras.bn_reestimation.reestimate_bn_stats(model, bn_re_estimation_dataset, bn_num_batches=100)[source]

      top level api for end user directly call

      -
      Parameters:
      +
      Parameters
      • model (Model) – tf.keras.Model

      • bn_re_estimation_dataset (DatasetV2) – Training dataset

      • bn_num_batches (int) – The number of batches to be used for reestimation

      -
      Return type:
      +
      Return type

      Handle

      -
      Returns:
      +
      Returns

      Handle that undos the effect of BN reestimation upon handle.remove()

      @@ -1161,20 +1153,20 @@

      Top-level APIs -
      Parameters:
      +
      Parameters

      sim (QuantizationSimModel) – QuantizationSimModel to be folded

      -
      Return type:
      +
      Return type

      List[Tuple[QcQuantizeWrapper, QcQuantizeWrapper]]

      -
      Returns:
      +
      Returns

      A list of pairs of layers [(Conv/Linear, BN layer that got folded)]

      - -
      +
      +

      Code Example

      Required imports

      from aimet_tensorflow.keras.bn_reestimation import reestimate_bn_stats
      @@ -1196,12 +1188,12 @@ 

      Code Example
      fold_all_batch_norms_to_scale(qsim)
       

      - -
      +
      +

      Limitations

      Please see The AIMET Keras ModelPreparer API limitations:

      - - +
      +
      diff --git a/releases/1.33.0/api_docs/keras_compression.html b/releases/1.33.0/api_docs/keras_compression.html index 7582701..f388cbc 100644 --- a/releases/1.33.0/api_docs/keras_compression.html +++ b/releases/1.33.0/api_docs/keras_compression.html @@ -1,8 +1,7 @@ - - + AIMET Keras Compression API — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1119,17 +1111,17 @@
      -
      +

      AIMET Keras Cross Layer Equalization APIs

      - -
      +
      -
      +
      +

      Introduction

      AIMET functionality for Keras Cross Layer Equalization supports three techniques:
        @@ -1139,8 +1131,8 @@

        Introduction +

      +

      Cross Layer Equalization API

      Listed below is a comprehensive API to apply all available techniques under cross layer equalization. It performs ‘auto’ detection of candidate layers and applies the techniques. @@ -1156,8 +1148,8 @@

      Cross Layer Equalization API +

      +

      Code Example

      Required imports

      import tensorflow as tf
      @@ -1172,8 +1164,8 @@ 

      Code Examplecle_applied_model = equalize_model(model)

      - -
      +
      +

      Primitive APIs

      If the user would like to call the APIs individually, then the following APIs can be used:

      @@ -1181,8 +1173,8 @@

      Primitive APIsPrimitive APIs for Cross Layer Equalization

    - - + + diff --git a/releases/1.33.0/api_docs/keras_layer_output_generation.html b/releases/1.33.0/api_docs/keras_layer_output_generation.html index 291c206..98e82dd 100644 --- a/releases/1.33.0/api_docs/keras_layer_output_generation.html +++ b/releases/1.33.0/api_docs/keras_layer_output_generation.html @@ -1,8 +1,7 @@ - - + AIMET Keras Layer Output Generation API — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
  • Determining Quantization Parameters (Encodings)
  • Quantization Schemes
  • Configuring Quantization Simulation Ops
  • -
  • Quantization Simulation APIs
  • Frequently Asked Questions
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +77,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +95,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +111,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations

    if a tensor is assigned more than one Encoding then the encoding is at per channel basis.

    - -
    + +

    2.2. Encoding File Example for PyTorch

    On PyTorch, the tensor names shall be derived from the ONNX named model representation as depicted below on a sample model.

    -Mapping between ONNX tensor names and encodings - +Mapping between ONNX tensor names and encodings

    Given below is the sample format with keys and values for encodings JSON output file on PyTorch.

    {
         “version”: “0.4.0”
    @@ -1230,8 +1221,8 @@ 

    2.2. Encoding File Example for PyTorch +

    +

    2.3. Encoding File Example for TensorFlow

    Given below is a sample format with the keys and values for encodings on TensorFlow graph (in JSON format).

    {
    @@ -1287,11 +1278,11 @@ 

    2.3. Encoding File Example for TensorFlow +

    +
    +

    3. Version 0.5.0

    -
    +

    3.1. Encoding Specification

    “version”: “string”
     “activation_encodings”:
    @@ -1336,8 +1327,8 @@ 

    3.1. Encoding Specificationbitwidth defines the precision of the tensor being generated by the producer and consumed by the downstream consumer(s).

    -

    -
    +
    +

    3.2. Encoding File Example for PyTorch

    Given below is a snippet of the sample format with change highlighted.

    {
    @@ -1367,8 +1358,8 @@ 

    3.2. Encoding File Example for PyTorch

    -
    -
    + +

    3.3. Encoding File Example for TensorFlow

    Given below is a snippet of the sample format with change highlighted.

    {
    @@ -1394,12 +1385,12 @@ 

    3.3. Encoding File Example for TensorFlow

    -
    - -
    + + +

    4. Version 0.6.1

    Adds a new field called quantizer_args to all exported encodings files.

    -
    +

    4.1. Encoding Specification

    -
    + + diff --git a/releases/1.33.0/api_docs/tensorflow.html b/releases/1.33.0/api_docs/tensorflow.html index 93340cf..8e68d2e 100644 --- a/releases/1.33.0/api_docs/tensorflow.html +++ b/releases/1.33.0/api_docs/tensorflow.html @@ -1,8 +1,7 @@ - - + AIMET TensorFlow APIs — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
  • Determining Quantization Parameters (Encodings)
  • Quantization Schemes
  • Configuring Quantization Simulation Ops
  • -
  • Quantization Simulation APIs
  • Frequently Asked Questions
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations - - + + diff --git a/releases/1.33.0/api_docs/tensorflow_layer_output_generation.html b/releases/1.33.0/api_docs/tensorflow_layer_output_generation.html index 8b6fe9a..baef787 100644 --- a/releases/1.33.0/api_docs/tensorflow_layer_output_generation.html +++ b/releases/1.33.0/api_docs/tensorflow_layer_output_generation.html @@ -1,8 +1,7 @@ - - + AIMET Tensorflow Layer Output Generation API — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
  • Determining Quantization Parameters (Encodings)
  • Quantization Schemes
  • Configuring Quantization Simulation Ops
  • -
  • Quantization Simulation APIs
  • Frequently Asked Questions
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -78,20 +76,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -99,13 +94,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -117,7 +110,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
  • @@ -76,20 +74,17 @@
  • AutoQuant
  • Adaptive Rounding (AdaRound)
  • Cross-Layer Equalization @@ -97,13 +92,11 @@
  • BN Re-estimation
  • Bias Correction [Depricated] @@ -115,7 +108,6 @@
  • Overview
  • Requirements
  • Detailed Analysis Descriptions
  • -
  • QuantAnalyzer API
  • Visualizations
      @@ -1111,16 +1103,20 @@
      -
      +

      Encoding Analyzers

      class aimet_torch.v2.quantization.encoding_analyzer.EncodingAnalyzer(observer)[source]
      -
      +

      Variants

      ++++ @@ -1133,8 +1129,8 @@

      Variants - - + Post-Training Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -61,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1111,9 +1103,9 @@
        -
        +

        Post-Training Quantization

        -
        +
        diff --git a/releases/1.33.0/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html b/releases/1.33.0/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html index dd0ee05..b5ee364 100644 --- a/releases/1.33.0/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html +++ b/releases/1.33.0/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html @@ -1,8 +1,7 @@ - - + MinMaxEncodingAnalyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -61,7 +60,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • MinMaxEncodingAnalyzer(shape)

      Encoding Analyzer for Min-Max calibration technique

      +++++ @@ -1276,8 +1273,8 @@

      Configuration + +

      Computing Encodings

      Before a module can compute a quantized forward pass, all quantizers must first be calibrated inside a compute_encodings context. When a quantized module enters the compute_encodings context, it first disables all input and output quantization @@ -1300,10 +1297,15 @@

      Computing Encodings +

      +

      Attribute

      Type

      +++++ @@ -2137,8 +2139,8 @@

      Quantized Module Classes - - + Quantizers — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • nn.Module

      FakeQuantizationMixin

      +++++ @@ -1174,11 +1170,11 @@

      Importsfrom aimet_torch.cross_layer_equalization import equalize_model

    • from aimet_torch.model_preparer import prepare_model

    • - +
      -
      +

      QuantizationSimModel

      -
      +

      Moving from QuantWrapper to Quantized Modules

      To enable quantization in QuantSim v1, modules are wrapped with a QuantizeWrapper. These wrapped modules can be accessed as follows:

      from aimet_torch.quantsim import QuantizationSimModel as QuantizationSimModelV1
      @@ -1229,8 +1225,8 @@ 

      Moving from QuantWrapper to Quantized Moduleshere.

      -

      -
      +
      +

      Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantizer

      In QuantSim v1, we relied on StaticGridQuantizer and LearnedGridQuantizer. For both, floating point quantization could be enabled based on QuantizationDataType passed in.

      from aimet_torch.tensor_quantizer import StaticGridPerChannelQuantizers
      @@ -1250,8 +1246,8 @@ 

      Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantiz

      From the wrapped module (QuantSim v1) or quantized module (QuantSim v2), the attributes to access the quantizers remain consistent: .input_quantizers for input quantizers, .output_quantizers for output quantizers, and .param_quantizers for parameter quantizers.

      For more information on Quantizers, please refer to the API reference guide here.

      -
      -
      + +

      Code Examples

      Setup

      -
      - -
      + + +

      Deprecated Features

      There are some components that are tied to the QuantSim v1 design that are not needed in QuantSim v2. For example, all QuantSim v2 source code will be implemented in Python to provide easier debugging and improved portability. It is not recommended to use libpymo modules with QuantSim 2.0. Below, you can see a list of these features and the recommended migration guideline:

      -

      AIMET Classes

      aimet_torch

      +
      --++ @@ -1440,9 +1436,9 @@

      Code Examples - - + Quickstart Guide — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -61,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1111,12 +1103,12 @@
        -
        +

        Quickstart Guide

        In this tutorial, we will go through the end-to-end process of using AIMET and PyTorch to create, calibrate, and export a simple quantized model. Note that this is intended to show the most basic workflow in AIMET. It is not meant to demonstrate the most state-of-the-art techniques available in AIMET.

        -
        +

        Overall flow

        1. Define the basic floating-point PyTorch model, training, and eval loops

        2. @@ -1126,8 +1118,8 @@

          Overall flow +

        +

        PyTorch prerequisites

        To see clearly what happens inside AIMET, let’s first start with some simple PyTorch code for defining, training, and evaluating a model. The code below is adapted from PyTorch’s @@ -1214,11 +1206,11 @@

        PyTorch prerequisites
        Floating point accuracy: 91.70999908447266
         

        -
        -
        +
        +

        Prepare the floating point model for quantization

        Before we can (accurately) simulate quantization, there are a couple important steps to take care of:

        -
        +

        1) Model preparation

        AIMET’s quantization simulation tool (QuantizationSimModel) expects the floating point model to conform to some specific guidelines. For example, QuantizationSimModel is only able to quantize math operations performed by @@ -1271,8 +1263,8 @@

        1) Model preparationrelu() and softmax() operations.

        -

        -
        +
        +

        2) BatchNorm fold

        When models are executed in a quantized runtime, batchnorm layers are typically folded into the weight and bias of an adjacent convolution layer whenever possible in order to remove unnecessary computations. To accurately simulate @@ -1318,9 +1310,9 @@

        2) BatchNorm foldIdentity (passthrough) layers where it previously had BatchNorm2d layers. Like the model_preparer step, this operation should not impact the model’s accuracy.

        -

        - -
        +
        +
        + +

        Fine-tune the model with quantization aware training

        If we’re not satisfied with our accuracy after applying quantization, there are some steps we can take to further optimize the quantized accuracy. One such step is quantization aware training (QAT), during which the model is trained @@ -1492,8 +1484,8 @@

        Fine-tune the model with quantization aware training

        - -
        + +

        Export the quantsim model

        Now that we are happy with our quantized model’s accuracy, we are ready to export the model with its quantization parameters.

        export_path = "/tmp/"
        @@ -1506,8 +1498,8 @@ 

        Export the quantsim model
        aimet_ops.LogicalNot,
             aimet_ops.NonZero,
             aimet_ops.ElementwiseUnarySign,
        -    aimet_ops.RSqrt,
        +    aimet_ops.RSqRt,
             aimet_ops.Square,
             aimet_ops.Mean,
             aimet_ops.Sum,
        diff --git a/releases/1.33.0/torch_v2/_modules/aimet_torch/v2/quantization/tensor.html b/releases/1.33.0/torch_v2/_modules/aimet_torch/v2/quantization/tensor.html
        index c36e6c4..f13118b 100644
        --- a/releases/1.33.0/torch_v2/_modules/aimet_torch/v2/quantization/tensor.html
        +++ b/releases/1.33.0/torch_v2/_modules/aimet_torch/v2/quantization/tensor.html
        @@ -226,26 +226,11 @@ 

        Source code for aimet_torch.v2.quantization.tensor

        # Operations that a per-tensor encoding can pass through _pertensor_passthrough_ops = { - torch.Tensor.__getitem__, - torch.Tensor.as_strided, torch.Tensor.broadcast_to, - torch.Tensor.chunk, - torch.Tensor.dsplit, torch.Tensor.expand, torch.Tensor.expand_as, torch.Tensor.flatten, - torch.Tensor.flip, - torch.Tensor.fliplr, - torch.Tensor.flipud, - torch.Tensor.gather, - torch.Tensor.hsplit, - torch.Tensor.index_select, - torch.Tensor.kthvalue, torch.Tensor.masked_select, - torch.Tensor.movedim, - torch.Tensor.moveaxis, - torch.Tensor.msort, - torch.Tensor.narrow, torch.Tensor.permute, torch.Tensor.repeat, torch.Tensor.reshape, @@ -253,61 +238,33 @@

        Source code for aimet_torch.v2.quantization.tensor

        torch.Tensor.resize, torch.Tensor.resize_as, torch.Tensor.select, - torch.Tensor.split, torch.Tensor.squeeze, torch.Tensor.swapaxes, torch.Tensor.swapdims, torch.Tensor.t, - torch.Tensor.take, - torch.Tensor.take_along_dim, - torch.Tensor.tensor_split, - torch.Tensor.tile, torch.Tensor.transpose, torch.Tensor.unflatten, torch.Tensor.unsqueeze, torch.Tensor.view, torch.Tensor.view_as, torch.as_strided, - torch.as_strided_copy, - torch.chunk, - torch.dsplit, - torch.expand_copy, + #torch.as_strided_copy, TODO: Uncomment when pytorch 1.9 support is fully deprecated + #torch.expand_copy, torch.flatten, - torch.flip, - torch.fliplr, - torch.flipud, - torch.gather, - torch.hsplit, - torch.index_select, - torch.masked_select, - torch.moveaxis, - torch.movedim, - torch.narrow, - torch.narrow_copy, torch.permute, - torch.permute_copy, + #torch.permute_copy, torch.reshape, - torch.select, - torch.split, torch.squeeze, - torch.squeeze_copy, - torch.swapaxes, + #torch.squeeze_copy, torch.swapdims, torch.t, - torch.take, - torch.take_along_dim, - torch.tensor_split, - torch.tile, - torch.t_copy, - torch.unbind, - torch.unflatten, + #torch.t_copy, + #torch.unflatten, torch.unsqueeze, - torch.unsqueeze_copy, - torch.vsplit, - torch.view_copy, + #torch.unsqueeze_copy, + #torch.view_copy } - @abc.abstractmethod def quantize(self) -> "QuantizedTensor": """ @@ -366,17 +323,6 @@

        Source code for aimet_torch.v2.quantization.tensor

        ret.encoding = encoding return ret - def new_empty(self, size, *, dtype=None, device=None, requires_grad=False, - layout=torch.strided, pin_memory=False, **kwargs) -> "QuantizedTensorBase": - # PyTorch requires subclasses of torch.Tensor to override this method such that - # it returns an instance of the subclass, not a plain torch.Tensor, - # for the subclass to be deep-copyable - encoding = kwargs.pop('encoding', None) - t = super().new_empty(size, dtype=dtype, device=device, requires_grad=requires_grad, - layout=layout, pin_memory=pin_memory, **kwargs).as_subclass(type(self)) - t.encoding = encoding - return t - @implements(torch.clone) def clone(self, *, memory_format=torch.preserve_format): """ @@ -423,26 +369,21 @@

        Source code for aimet_torch.v2.quantization.tensor

        self, *_ = args ret.encoding = copy.copy(self.encoding) # shallow copy - def propagate_encoding(qtensor, encoding): - if isinstance(qtensor, QuantizedTensorBase): - qtensor.encoding = copy.copy(encoding) - if func in cls._passthrough_ops: self, *_ = args - tree_map(lambda t: propagate_encoding(t, self.encoding), ret) + ret.encoding = copy.copy(self.encoding) if func in cls._pertensor_passthrough_ops: self, *_ = args - if self.encoding and self.encoding.granularity == "pertensor": + if self.encoding.granularity == "pertensor": # Return a cls object with the same encoding which can later be quantized or dequantized - tree_map(lambda t: propagate_encoding(t, self.encoding), ret) + ret.encoding = copy.copy(self.encoding) else: # Return a cls object with no encoding # If the user later tries to quantize or dequantize this, an error will be thrown - tree_map(lambda t: propagate_encoding(t, None), ret) + ret.encoding = None return ret - def set_encoding(qtensor): if not hasattr(qtensor, 'encoding'): qtensor.encoding = None @@ -521,6 +462,24 @@

        Source code for aimet_torch.v2.quantization.tensor

        without further loss in information. """ + def __getstate__(self): + state = self.__dict__ + state["data"] = self.data + state["encoding"] = self.encoding + return state + + def __setstate__(self, state): + self.data = state["data"] + self.encoding = state["encoding"] + + def __deepcopy__(self, memo): + new_instance = type(self).__new__(type(self)) + state = self.__getstate__() + new_instance.__setstate__(state) + new_instance.encoding = copy.deepcopy(state["encoding"]) + new_instance.data = copy.deepcopy(state["data"]) + return new_instance +
        [docs] def quantize(self) -> QuantizedTensor: """ Quantizes ``self`` using :attr:`self.encoding` to produce a :class:`QuantizedTensor` with the same encoding diff --git a/releases/1.33.0/torch_v2/_static/basic.css b/releases/1.33.0/torch_v2/_static/basic.css index 4e9a9f1..eeb0519 100644 --- a/releases/1.33.0/torch_v2/_static/basic.css +++ b/releases/1.33.0/torch_v2/_static/basic.css @@ -236,6 +236,16 @@ div.body p, div.body dd, div.body li, div.body blockquote { a.headerlink { visibility: hidden; } +a.brackets:before, +span.brackets > a:before{ + content: "["; +} + +a.brackets:after, +span.brackets > a:after { + content: "]"; +} + h1:hover > a.headerlink, h2:hover > a.headerlink, @@ -324,15 +334,11 @@ aside.sidebar { p.sidebar-title { font-weight: bold; } -nav.contents, -aside.topic, div.admonition, div.topic, blockquote { clear: left; } /* -- topics ---------------------------------------------------------------- */ -nav.contents, -aside.topic, div.topic { border: 1px solid #ccc; padding: 7px; @@ -371,8 +377,6 @@ div.body p.centered { div.sidebar > :last-child, aside.sidebar > :last-child, -nav.contents > :last-child, -aside.topic > :last-child, div.topic > :last-child, div.admonition > :last-child { margin-bottom: 0; @@ -380,8 +384,6 @@ div.admonition > :last-child { div.sidebar::after, aside.sidebar::after, -nav.contents::after, -aside.topic::after, div.topic::after, div.admonition::after, blockquote::after { @@ -606,26 +608,19 @@ ol.simple p, ul.simple p { margin-bottom: 0; } -aside.footnote > span, -div.citation > span { +dl.footnote > dt, +dl.citation > dt { float: left; + margin-right: 0.5em; } -aside.footnote > span:last-of-type, -div.citation > span:last-of-type { - padding-right: 0.5em; -} -aside.footnote > p { - margin-left: 2em; -} -div.citation > p { - margin-left: 4em; -} -aside.footnote > p:last-of-type, -div.citation > p:last-of-type { + +dl.footnote > dd, +dl.citation > dd { margin-bottom: 0em; } -aside.footnote > p:last-of-type:after, -div.citation > p:last-of-type:after { + +dl.footnote > dd:after, +dl.citation > dd:after { content: ""; clear: both; } @@ -641,6 +636,10 @@ dl.field-list > dt { padding-left: 0.5em; padding-right: 5px; } +dl.field-list > dt:after { + content: ":"; +} + dl.field-list > dd { padding-left: 0.5em; diff --git a/releases/1.33.0/torch_v2/_templates/autosummary/class.html b/releases/1.33.0/torch_v2/_templates/autosummary/class.html index 3567492..a01584c 100644 --- a/releases/1.33.0/torch_v2/_templates/autosummary/class.html +++ b/releases/1.33.0/torch_v2/_templates/autosummary/class.html @@ -1,8 +1,7 @@ - - + <no title> — AI Model Efficiency Toolkit Documentation: ver 1.33.0 diff --git a/releases/1.33.0/torch_v2/_templates/autosummary/function.html b/releases/1.33.0/torch_v2/_templates/autosummary/function.html index b0fbb53..b0bc9bb 100644 --- a/releases/1.33.0/torch_v2/_templates/autosummary/function.html +++ b/releases/1.33.0/torch_v2/_templates/autosummary/function.html @@ -1,8 +1,7 @@ - - + <no title> — AI Model Efficiency Toolkit Documentation: ver 1.33.0 diff --git a/releases/1.33.0/torch_v2/install/index.html b/releases/1.33.0/torch_v2/install/index.html index 4a469ec..883aa21 100644 --- a/releases/1.33.0/torch_v2/install/index.html +++ b/releases/1.33.0/torch_v2/install/index.html @@ -1,8 +1,7 @@ - - + AIMET Installation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,47 +118,49 @@
        -
        +

        AIMET Installation

        -
        +

        Quick Install

        The AIMET PyTorch GPU PyPI packages are available for environments that meet the following requirements:

        • 64-bit Intel x86-compatible processor

        • Linux Ubuntu 22.04 LTS [Python 3.10] or Ubuntu 20.04 LTS [Python 3.8]

        • -
        • Cuda 12.0

        • +
        • CUDA 12.0

        • Torch 2.2.2

        -

        Pip install:

        +

        Pip install

        apt-get install liblapacke
         python3 -m pip install aimet-torch
         
        -
        -
        +
        +

        Release Packages

        -

        For other aimet variants, install the latest version from the .whl files hosted at https://github.com/quic/aimet/releases

        +

        For other AIMET variants, install the latest version from the .whl files hosted at https://github.com/quic/aimet/releases

        PyTorch

        -
        # Pytorch 1.13 with CUDA 11.x
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_torch-torch_gpu_1.31.0-cp38-cp38-linux_x86_64.whl
        -# Pytorch 1.13 CPU only
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_torch-torch_cpu_1.31.0-cp38-cp38-linux_x86_64.whl
        +
        # Pytorch 2.1 with CUDA 11.x
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_torch-1.33.0.cu118-cp310-cp310-manylinux_2_34_x86_64.whl
        +
        +# Pytorch 2.1 CPU only
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_torch-1.33.0.cpu-cp310-cp310-manylinux_2_34_x86_64.whl
        +
        +# Pytorch 1.13 with CUDA 11.x
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_torch-1.33.0.cu117-cp310-cp310-manylinux_2_34_x86_64.whl

        TensorFlow

        # Tensorflow 2.10 GPU with CUDA 11.x
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_tensorflow-tf_gpu_1.31.0-cp38-cp38-linux_x86_64.whl
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_tensorflow-1.33.0.cu118-cp310-cp310-manylinux_2_34_x86_64.whl
        +
         # Tensorflow 2.10 CPU only
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_tensorflow-tf_cpu_1.31.0-cp38-cp38-linux_x86_64.whl
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_tensorflow-1.33.0.cpu-cp310-cp310-manylinux_2_34_x86_64.whl

        Onnx

        -
        # ONNX 1.14 GPU
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_onnx-onnx_gpu_1.31.0-cp38-cp38-linux_x86_64.whl
        +
        # ONNX 1.14 GPU with CUDA 11.x
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_onnx-1.33.0.cu117-cp310-cp310-manylinux_2_34_x86_64.whl
         # ONNX 1.14 CPU
        -python3 -m pip install https://github.com/quic/aimet/releases/download/1.31.0/aimet_onnx-onnx_cpu_1.31.0-cp38-cp38-linux_x86_64.whl
        -

        For previous AIMET releases, browse packages at https://github.com/quic/aimet/releases. Each release includes multiple python packages of the following format:

        -
        # VARIANT in {torch_gpu, torch_cpu, tf_gpu, tf_cpu, onnx_gpu, onnx_cpu}
        -# PACKAGE_PREFIX in {aimet_torch, aimet_tensorflow, aimet_onnx}
        -<PACKAGE_PREFIX>-<VARIANT>_<VERSION>-cp38-cp38-linux_x86_64.whl
        -

        -
        +python3 -m pip install https://github.com/quic/aimet/releases/download/1.33.0/aimet_onnx-1.33.0.cpu-cp310-cp310-manylinux_2_34_x86_64.whl +

        For older versions, please browse the releases at https://github.com/quic/aimet/releases and follow the documentation corresponding to that release to select and install the appropriate package.

        + +

        System Requirements

        The AIMET package requires the following host platform setup:

          @@ -176,8 +177,8 @@

          System Requirements +

        +

        Advanced Installation Instructions

        There are two ways to setup and install AIMET:
        -
        - + + diff --git a/releases/1.33.0/torch_v2/install/install_docker.html b/releases/1.33.0/torch_v2/install/install_docker.html index 9084205..ff86cf9 100644 --- a/releases/1.33.0/torch_v2/install/install_docker.html +++ b/releases/1.33.0/torch_v2/install/install_docker.html @@ -1,8 +1,7 @@ - - + AIMET Installation in Docker — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -120,17 +119,16 @@
        -
        +

        AIMET Installation in Docker

        This page provides instructions to install AIMET package inside a development docker container.

        -
        +

        Set variant

        Set the <variant_string> to ONE of the following depending on your desired variant
        1. For the PyTorch 2.1 GPU variant, use torch-gpu

        2. For the PyTorch 2.1 CPU variant, use torch-cpu

        3. For the PyTorch 1.13 GPU variant, use torch-gpu-pt113

        4. -
        5. For the PyTorch 1.13 CPU variant, use torch-cpu-pt113

        6. For the TensorFlow GPU variant, use tf-gpu

        7. For the TensorFlow CPU variant, use tf-cpu

        8. For the ONNX GPU variant, use onnx-gpu

        9. @@ -141,8 +139,8 @@

          Set variant
          export AIMET_VARIANT=<variant_string>
           

        -
        -
        +
        +

        Use prebuilt docker image

        Follow these instructions to use one of the pre-built docker images:

        WORKSPACE="<absolute_path_to_workspace>"
        @@ -151,8 +149,8 @@ 

        Use prebuilt docker image +

        +

        Build docker image locally

        Follow these instructions ONLY if you want to build the docker image locally. If not, skip to the next section.

        WORKSPACE="<absolute_path_to_workspace>"
        @@ -162,8 +160,8 @@ 

        Build docker image locally +

        +

        Start docker container

        Ensure that a docker named $docker_container_name is not already running; otherwise remove the existing container and then start a new container as follows:

        docker ps -a | grep ${docker_container_name} && docker kill ${docker_container_name}
        @@ -193,81 +191,70 @@ 

        Start docker container /bin/bash -w ${WORKSPACE} --hostname ${docker_container_name} ${docker_image_name}

        -
        -
        +
        +

        Install AIMET packages

        -
        +

        From PyPI

        -

        Aimet Torch GPU can install from pypi through the following method:

        -

        Go to https://pypi.org/project/aimet-torch to identify a version you wish to install

        -
        -
          -
        • For PyTorch 1.13 GPU you should use aimet-torch==1.31.1

        • -
        • For Pytorch 2.1.2 GPU you should use aimet-torch >= 1.32.0

        • +
          +
          The default AIMET Torch GPU variant may be installed from PyPI as follows:
            +
          • Go to https://pypi.org/project/aimet-torch

          • +
          • +
            Browse the Requirements section of each Release to identify the version you wish to install. Following are some tips:
              +
            • For Pytorch 2.2.2 GPU with CUDA 12.1, use aimet-torch>=1.32.2

            • +
            • For Pytorch 2.1.2 GPU with CUDA 12.1, use aimet-torch==1.32.1.post1

            • +
            • For PyTorch 1.13 GPU with CUDA 11.7, use aimet-torch==1.31.2

            • +
            +
            +
            +
          -
        -
        sudo apt-get install liblapacke -y
        -pip install aimet-torch
        +
        +
        +

        Run the following commands to install the package (prepend with “sudo” and/or package version as needed):

        +
        apt-get install liblapacke -y
        +python3 -m pip install aimet-torch
         
        -
        -
        -

        From Release Package

        -

        Alternatively, we host .whl packages for each release at https://github.com/quic/aimet/releases. Identify the release tag -of the package you wish to install, then follow the instructions below to install AIMET from the .whl file.

        -

        Set the <variant_string> to ONE of the following depending on your desired variant

        -
          -
        1. For the PyTorch 2.1 GPU variant, use “torch_gpu”

        2. -
        3. For the PyTorch 2.1 CPU variant, use “torch_cpu”

        4. -
        5. For the PyTorch 1.13 GPU variant, use “torch_gpu-pt113”

        6. -
        7. For the PyTorch 1.13 CPU variant, use “torch_cpu-pt113”

        8. -
        9. For the TensorFlow GPU variant, use “tf_gpu”

        10. -
        11. For the TensorFlow CPU variant, use “tf_cpu”

        12. -
        13. For the ONNX GPU variant, use “onnx_gpu”

        14. -
        15. For the ONNX CPU variant, use “onnx_cpu”

        16. -
        -
        export AIMET_VARIANT=<variant_string>
        -
        -

        Replace <release_tag> in the steps below with the appropriate tag:

        -
        export release_tag=<release_tag>
        +
        +

        From Release Package

        +
        +
        We also host python wheel packages for different variants which may be installed as follows:
          +
        • Go to https://github.com/quic/aimet/releases

        • +
        • Identify the release tag of the package that you wish to install

        • +
        • Identify the .whl file corresponding to the package variant that you wish to install

        • +
        • Follow the instructions below to install AIMET from the .whl file

        • +
        +
        +
        +

        Set the package details as follows:

        +
        # Set the release tag ex. "1.33.0"
        +export release_tag="<version release tag>"
        +
        +# Construct the download root URL
        +export download_url="https://github.com/quic/aimet/releases/download/${release_tag}"
        +
        +# Set the wheel file name with extension
        +# ex. "aimet_torch-1.33.0.cu118-cp310-cp310-manylinux_2_34_x86_64.whl"
        +export wheel_file_name="<wheel file name>"
         
        -

        Set the package download URL as follows:

        -
        export download_url="https://github.com/quic/aimet/releases/download/${release_tag}"
        +

        Install the selected AIMET package as specified below: +NOTE: Python dependencies will automatically get installed.

        +
        python3 -m pip install ${download_url}/${wheel_file_name}
         
        -

        Set the common suffix for the package files as follows:

        -
        export wheel_file_suffix="cp310-cp310-linux_x86_64.whl"
        -
        -

        Install the AIMET packages in the order specified below:

        -
        -
        NOTE:
          -
        1. Please pre-pend the “apt-get install” and “pip3 install” commands with “sudo -H” as appropriate.

        2. -
        3. These instructions assume that pip packages will be installed in the path: /usr/local/lib/python3.10/dist-packages. If that is not the case, please modify it accordingly.

        4. -
        5. Python dependencies will automatically get installed.

        6. -
        -
        -
        -
        # Install ONE of the following depending on the variant
        -python3 -m pip install ${download_url}/aimet_torch-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} -f https://download.pytorch.org/whl/torch_stable.html
        -# OR
        -python3 -m pip install ${download_url}/aimet_tensorflow-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix}
        -# OR
        -python3 -m pip install ${download_url}/aimet_onnx-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix}
        -
        -
        - -
        +

        Environment setup

        Set the common environment variables as follows:

        source /usr/local/lib/python3.10/dist-packages/aimet_common/bin/envsetup.sh
         
        -
        - +
        +
        diff --git a/releases/1.33.0/torch_v2/install/install_host.html b/releases/1.33.0/torch_v2/install/install_host.html index f6bc859..d1159ea 100644 --- a/releases/1.33.0/torch_v2/install/install_host.html +++ b/releases/1.33.0/torch_v2/install/install_host.html @@ -1,8 +1,7 @@ - - + AIMET Installation and Setup — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -120,7 +119,7 @@
        -
        +

        AIMET Installation and Setup

        This page provides instructions to install AIMET package on Ubuntu 22.04 LTS with Nvidia GPU. Please follow the instructions in the order provided, unless specified otherwise.

        @@ -130,7 +129,7 @@
        -
        +

        Install prerequisite packages

        Install the basic pre-requisite packages as follows:

        apt-get update
        @@ -144,8 +143,8 @@ 

        Install prerequisite packages --set python3 /usr/bin/python3.10

        -
        -
        +
        +

        Install GPU packages

        NOTE:

          @@ -162,7 +161,7 @@

          Install GPU packages
        1. The instructions in the sub-sections below correspond to our tested versions above. Visit this page https://developer.nvidia.com/cuda-toolkit-archive to obtain the correct version of the CUDA toolkit for your environment.

        -
        +

        Install GPU packages for PyTorch 2.1 or TensorFlow

        NOTE:

          @@ -180,8 +179,8 @@

          Install GPU packages for PyTorch 2.1 or TensorFlow update

        -
        -
        +
        +

        Install GPU packages for PyTorch 1.13 or ONNX

        NOTE:

          @@ -199,69 +198,64 @@

          Install GPU packages for PyTorch 1.13 or ONNX update

        - - -
        + + +

        Install AIMET packages

        -
        +

        From PyPI

        -

        Aimet Torch GPU can install from pypi through the following method:

        -

        Go to https://pypi.org/project/aimet-torch to identify a version you wish to install

        -
        -
          -
        • For PyTorch 1.13 GPU you should use aimet-torch==1.31.1

        • -
        • For Pytorch 2.1.2 GPU you should use aimet-torch >= 1.32.0

        • +
          +
          The default AIMET Torch GPU variant may be installed from PyPI as follows:
            +
          • Go to https://pypi.org/project/aimet-torch

          • +
          • +
            Browse the Requirements section of each Release to identify the version you wish to install. Following are some tips:
              +
            • For Pytorch 2.2.2 GPU with CUDA 12.1, use aimet-torch>=1.32.2

            • +
            • For Pytorch 2.1.2 GPU with CUDA 12.1, use aimet-torch==1.32.1.post1

            • +
            • For PyTorch 1.13 GPU with CUDA 11.7, use aimet-torch==1.31.2

            • +
            +
            +
            +
          -
        -
        sudo apt-get install liblapacke -y
        -pip install aimet-torch
        +
        +
        +

        Run the following commands to install the package (prepend with “sudo” and/or package version as needed):

        +
        apt-get install liblapacke -y
        +python3 -m pip install aimet-torch
         
        -
        -
        -

        From Release Package

        -

        Alternatively, we host .whl packages for each release at https://github.com/quic/aimet/releases. Identify the release tag -of the package you wish to install, then follow the instructions below to install AIMET from the .whl file.

        -

        Set the <variant_string> to ONE of the following depending on your desired variant

        -
          -
        1. For the PyTorch 2.1 GPU variant, use “torch_gpu”

        2. -
        3. For the PyTorch 2.1 CPU variant, use “torch_cpu”

        4. -
        5. For the PyTorch 1.13 GPU variant, use “torch_gpu_pt113”

        6. -
        7. For the PyTorch 1.13 CPU variant, use “torch_cpu_pt113”

        8. -
        9. For the TensorFlow GPU variant, use “tf_gpu”

        10. -
        11. For the TensorFlow CPU variant, use “tf_cpu”

        12. -
        13. For the ONNX GPU variant, use “onnx_gpu”

        14. -
        15. For the ONNX CPU variant, use “onnx_cpu”

        16. -
        -
        export AIMET_VARIANT=<variant_string>
        -
        -

        Replace <release_tag> in the steps below with the appropriate tag:

        -
        export release_tag=<release_tag>
        +
        +

        From Release Package

        +
        +
        We also host python wheel packages for different variants which may be installed as follows:
          +
        • Go to https://github.com/quic/aimet/releases

        • +
        • Identify the release tag of the package that you wish to install

        • +
        • Identify the .whl file corresponding to the package variant that you wish to install

        • +
        • Follow the instructions below to install AIMET from the .whl file

        • +
        +
        +
        +

        Set the package details as follows:

        +
        # Set the release tag ex. "1.33.0"
        +export release_tag="<version release tag>"
        +
        +# Construct the download root URL
        +export download_url="https://github.com/quic/aimet/releases/download/${release_tag}"
        +
        +# Set the wheel file name with extension
        +# ex. "aimet_torch-1.33.0.cu118-cp310-cp310-manylinux_2_34_x86_64.whl"
        +export wheel_file_name="<wheel file name>"
         
        -

        Set the package download URL as follows:

        -
        export download_url="https://github.com/quic/aimet/releases/download/${release_tag}"
        +

        Install the selected AIMET package as specified below: +NOTE: Python dependencies will automatically get installed.

        +
        python3 -m pip install ${download_url}/${wheel_file_name}
         
        -

        Set the common suffix for the package files as follows:

        -

        NOTE: Set wheel_file_suffix to cp310-cp310-linux_x86_64.whl OR cp38-cp38-linux_x86_64.whl OR cp36-cp36m-linux_x86_64 OR cp37-cp37m-linux_x86_64 OR py3-none-any as appropriate depending on the actual wheel filename(s) on the https://github.com/quic/aimet/releases.

        -
        export wheel_file_suffix="cp310-cp310-linux_x86_64.whl"
        -
        -

        Install the AIMET packages in the order specified below:

        -

        NOTE: Python dependencies will automatically get installed.

        -
        # Install ONE of the following depending on the variant
        -python3 -m pip install ${download_url}/aimet_torch-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix} -f https://download.pytorch.org/whl/torch_stable.html
        -# OR
        -python3 -m pip install ${download_url}/aimet_tensorflow-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix}
        -# OR
        -python3 -m pip install ${download_url}/aimet_onnx-${AIMET_VARIANT}_${release_tag}-${wheel_file_suffix}
        -
        -
        -
        -
        +

        Install common debian packages

        Install the common debian packages as follows:

        cat /usr/local/lib/python3.10/dist-packages/aimet_common/bin/reqs_deb_common.txt | xargs apt-get --assume-yes install
        @@ -275,37 +269,37 @@ 

        Install common debian packages
        cat /usr/local/lib/python3.10/dist-packages/aimet_onnx/bin/reqs_deb_onnx_common.txt | xargs apt-get --assume-yes install
         

        -
        -
        + +

        Install tensorflow GPU debian packages

        NOTE: Do this ONLY for the TensorFlow GPU package.

        cat /usr/local/lib/python3.10/dist-packages/aimet_tensorflow/bin/reqs_deb_tf_gpu.txt | xargs apt-get --assume-yes install
         
        -
        -
        + +

        Install torch GPU debian packages

        NOTE: Do this ONLY for the PyTorch GPU package.

        cat /usr/local/lib/python3.10/dist-packages/aimet_torch/bin/reqs_deb_torch_gpu.txt | xargs apt-get --assume-yes install
         
        -
        -
        + +

        Install ONNX GPU debian packages

        NOTE: Do this ONLY for the ONNX GPU package.

        cat /usr/local/lib/python3.10/dist-packages/aimet_onnx/bin/reqs_deb_onnx_gpu.txt | xargs apt-get --assume-yes install
         
        -
        -
        + +

        Replace Pillow with Pillow-SIMD

        Optional: Replace the Pillow package with Pillow-SIMD as follows:

        python3 -m pip uninstall -y pillow
         python3 -m pip install --no-cache-dir Pillow-SIMD==9.0.0.post1
         
        -
        -
        + +

        Replace onnxruntime with onnxruntime-gpu

        NOTE: Do this ONLY for the PyTorch GPU package.

        export ONNXRUNTIME_VER=$(python3 -c 'import onnxruntime; print(onnxruntime.__version__)')
        @@ -313,8 +307,8 @@ 

        Replace onnxruntime with onnxruntime-gpu -m pip install --no-cache-dir onnxruntime-gpu==$ONNXRUNTIME_VER

        -
        -
        + +

        Post installation steps

        ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib
         
        @@ -325,15 +319,15 @@

        Post installation steps -s /usr/local/cuda-11.8 /usr/local/cuda

        -
        -
        + +

        Environment setup

        Set the common environment variables as follows:

        source /usr/local/lib/python3.10/dist-packages/aimet_common/bin/envsetup.sh
         
        -
        - + + diff --git a/releases/1.33.0/torch_v2/objects.inv b/releases/1.33.0/torch_v2/objects.inv index 60268b9ebbf9f13438525592b6674015540f91e8..7f3a47b6a5ecef07b79aaaedd7a7bd8db2297d0c 100644 GIT binary patch delta 10935 zcmV;oDoEAcSNc_upMS}c@4JKIi;j>j%g@o*wg%tlUUWn~sz{X>sv=7yOS1ace;7%C z#E<|7`E6_!h|F^`iJ3qm&NgYisEcxWckp<8kmm>I@qK!^i}O1Bzqrnd{B!mr%hC4v zWVw#33LPPud65*$>M-7Bhgjqmi!2@_nZ;UVp_fl!FD+vdHqRUMvVG zmb)vGX;GKOX1lA?MVc>*B+FOTT)?+Q`4pGQY;a|Isjd$yMaetqddAgnfa26nN8#WloyXhK=7|>!3h4gQzh9XvTm&|IKl6ezX>A^ev z540H;PEL3WP=#p+w!>qgfg9qn039vySTG*7dn`zw>OB^wNB#DKY=jKlBW~!Ov3oh#7@90 zg2*jq^P&bZh;iv^mfDQN)I$dlLvfr<#P~pSnO2gq#KcxnjDvW+E}j;}V_H6yS)I;e znE#M=9Di6cLl%(pA+FO^ynLS9)~uq_lPq470p&cHWxC3$IxW?4B^>~#IC<)pRbW zqkLy&jcgDDS_3;bVe2B!Au26v(PZMItM z@PE*vqGOnNS!a)I4$&^3W5Gq!jp_PwW9zr_r((Cy#Ny zOp_>+GSVOAj`FNrKoAwDJc~ky{t#KT+<(X$P_(+a^hIEPc-Upjdm-qAQFC9CUjjj= zyiC_Ao*+isczGYMQnyQc0#_^nUGM}QO7)DV_M520&0SXV&;KqUoy;{&!;o#73`hSb zrS=|=F)Rg9$!92$I6FF{Icx};w~Bv6ECU$ct|ge;%`RIfTuYQ~@XLvCUB;Q7;eX65 zI^K~-h?As`5PxK4mFACGS>)v9MYxUHZQ*44q9586L{qHLWLgz%pSKwtvK2YUf1!)# zi%)KdP95q=ALPA#)?;7JiHRj!)dXJL@%C6JfLt){y7nBZ3uJ;r4g=kU6y2wkT& z;(yzL>bR^$F~!|r9cQ^u1vJQClYeAU#G7O*lO1_;WlHKzt<)Ph8MBJN$yyi-r)8jn2kY;V8tFI_SlErYP$>Oq<_c?CLb^U z4#kh~NFSFnI;xM3qNBGF{$1qxk0Hd&N_C78&W(%rHEW@L{9gW-Qy z*(UL&Y*HhbUeGi~lKmZBfBJm(>JYwpvo}`s+xLmIyF5J!?CH0v%72+@-XTQ68&rY$ zCNi{QA+va*3mPgol~?+QR_1U^A2nEVD#!Fr*399T9%!hbuUEz&MCmnx z2S{P#Pt(wI0Ho&Kd4D@<$k8J}o^g%w99Iso%{`n98A=YY+ug0zy-ju z;t<2lLqxCP;SeL?qryRkS_gyP1Hl0XYr+LgXtxF}Ol$%#%wxT$)st8YAM+2f);iSp z9p(?Q)^l_}$Y%GTK5Rfgz<%#>{168~hVOwx_W>^O9kCDb0DoYh9yUlHV1G4hBVUCz z^3~Lhe3i~+y{FaQ){ZvK=qqwhA;vtO?V7laeH0wEzo)iq&+kP1oL=yx=9PPLx4x9m zB-|;jov&&rVKPL$pV%UwbEq>~zvmNM#Ut;}<4n-({pFl$n)yRyCU){m6vqa0`hv8W zzM)YAo}h<$Y=8QBnwn0)7av|8d8c;gi} z)pY<(IEeR)WkF8pze77xcf1s2csP(EAPkE^dQ9U6gtQ5koZfLS?z13wjqj;B3Xln7 zqL)9hH6wI+a%iuBrpVOc(WhR>kQSJQdkLlphw!z9N4D{DpcD-UX@c%R0rvp$d)o4m zu#_N{dw)SOwL*j@wLRz?-%vL|zup1lr*+N?xMrAR=5-5?cs0LsIPeHM#&pzN&B~>J zqjEW2lkx%$%H^QuI`V^yWnC?af0F z9hm07iYIhIavA)Fe);R--Q{bjId!p(*6Cxqj^b{pF(7SK!?$9K{vyB7DTzO3z2U*u zGk@vtgFqtSJQ5M-D>ufTUtD{UA`(0)?hU*iW0*L=8zMClsgZwRC764JJ|!bDFN~dy~$yd z<(v3Nx7=HOH^IdzNdIhN3-`uQG^d;PMt`{uXAC7%znP!F*9Hm``lG!uXO{ZtxJCz= zpU+_J-dM`!cH7=)_dkuL=s)Rgb#IRPPknh^J0{XQ$E*E`s+NVy#KpgQ$jez8W#i3S zNcUt?+xCX5na@3YBV714T*c4+KJ1so{U^OFyCoCrpY}Dsa7EI>I}5veBTx6{o_`bk zANpJ*IOB)g+iaO(&}a0mDAxB`O)e(ckq{vLnQr&`oH&a%L`#%5TB4e1A=e(XS^&(v zRgPrIDy?fW^QRX`qzQy+SL7r6%`Tfr)m_-_XEe4Esj|MPes1ObLqbd?$!-WulmvQ# zE%^r?xIoXxq?=q+3ki^B&OEYR(0@>ABbe!ql?Ij_+X?WfX=C^S5;o08d~9#H&W4QP z19Jp5x6s)3f&(v&VK^Z2l85xzu7Jm28p9WmAWS~uV|xP}j%f^M0OB!uNCRBK+7Z%A z2RC~<74Jn_^XUafhI}vHmG-fak%lb%Uj7AuZc-z@5!$su$H2mlo3f4w$7Bgq`Ds=o#BFCFx}+^+id#a1<^&iNyyo zxlw0PJf`JSNv^1x+ygzt@qbiJ<01c9;&2~1=Zb@rP2(pL4_H9V;35TXBt6qPW}U9$ z<@2m%NDDi%zG)nmRdm58i`R6(I=!npfTL-e#yw@a%Bngo)vdI2Dwx)mW%=>^S;V$y-W?QMy{ciUk&wG;ZRS35DCx>%x?l&`b`|`9sYqUT6MU|R)1{TzgX?E#2!V$ zEXDWyk;D{BFbl7<4SGC=SByy zV$e3YFgIz@wk(!uReud5C@cCbQqToU@u5{blEp`Q?Kidesf@SVv~27LEuFb2dx3nW zxe>|DeEpr}ZhUWCdV`xvKJ#bGRhcF968Uu+mwB4d-%S%r*s)%M=Aq}k+NWdqVgwel zv#zPSk60YxtmrSMj{a`C7~;fJ7H^2;p;18ErykyvJ5yXo=zsV0P@iaZ$t(1@w$bnJ z27V8_YDpJu$lInrEcN5pkySykj(QUC#LMe3B9KS~INXi$Sf=G%F zC3X-hs_)XaJ%he1Jv*~TcG?QzCrCU&8_)N)M~iyBBmZCowoilJXsn!#ZH`m~ z+Y$tJE`+G1%zt!xr?PUeg9;%bF71z$s#5Ca3c#uO-;#DSzA4t6+v2Rn14BrULqL6NwQ` z9KfujsNZ+&Dm?$D4-excE{k2B^m`{=rgkG8I!)p$l4jJlCw@dYL$r*0pI%J>IzfSk z{;pE=KBiw;jks((9Z!N{szOj3mjxaekbnv0Cd-R;v3dsLX$0TXBqSx7aeZCd<~KMH z7b>eBaUxBFLiFT3AW4h=66~@Fx*};stC_5k^O7 zsnT`2v=%2VmA<$ja4k}t6vSwbE=MH0GWDa-7Z)SK3z6caz~WxuIT`SxWCC>{PpH7n&O&zhi8XzwD`y0E zjXQ>AR{TRV1KJ9p5pbX>t)0?9o9>a#xGSEbtd2;hR}JQ^DElYwLbDptA0f<~qXZ}` z`@jLnhO8T}<7fPD`eVB;%6OC)t=D=XQh&+PmPl1d8_q@ba_~BDK1PHm3#De#D4&Kh zA6g;;y1A^D22|4^wxT75@M-e4U!Al%t*5B;s-35(v_Ki|>+t}DE&hSss*(C6C0X6& ztYQ+5y%x1POI*V7>@&*p+oE*LH6-;qu`8B_?P<~spmni~YFy_aNKIb+2O@tcI)Cr+ z2q<4F5(pw`%6y7)_2?G8{}N?7*j$Pi~T|=Jj(+HnIvT^+awU zPMEa-(lbv2Mm6EYLOOB-IcUF4pP@?IZ|h0k#t)S*8!B^tdI=~7dxPIgUqQ&bD7M5O z@j9y0{Wg() znc`n~{D|09d9t*}EVgIwb#`iqJSUdYPnkalx=EN74QHo(ZFUZw?F99@7)K?TsCKjs zr|DeN^`P14($Lf{{u*c|>CJAnYQk4mSw$x{{;o7CIuj2LPx%8nC#w9|wbg0>Gf7Fq zN~)6(M$q7v0A7J1E&&`t&3~7M5SN2DOM5KvdQhR3M87qELNMK*7fq0^4~^uFEW{h( z!5`WoleHaoh)Qj;`Q2ha2mw!8yoE5AC~3Sn!~`##3C*=mDIj*}=NABE=a3hIZXf>w zSOA895FYNaAb<;9|FX6K5Gt(}x=?oBZ3~!`InbOrt5o%9>1%^ES%1~bovuFSlpRtl zA3!5X>o{AdN!zU^X|*h~t>!vV2P227GyWJg{q&ji*u~RXF?5XV6+weSw^o%Yi5}%= zyCQfc{pcf#V?T5|VpDG2Mb04CEq~1znC^D`*6enK)6k6vZM!=?hyqlPYYME`*cb!K zkOluCW)qYuoItDM@_+Oj-CI|AQ*cfVKr9+9gvgU_7U1GSKIU(5y-3*3^4>np(aM=G z(3P5>PM_s67(vUR61N3jsZ}vK3<#?N*V$&b*2aPX6H!CTM7Ovtj@S+g+YS*2OS@8q zDmo`|d9TGoALiuaXuPl+=OgPM}WOL0NOx= zfyR-0lfeEZnN1#TU$hfUXIWdC{HQo|4J0ZGc5j+175-vdc~XXIec$FYR;sVt%56rH zvVGe^k8;?#vAgLv3d55PrzCO^ZP)i<51hOIL-26tiDiuY( zIYj*3Ij|KZM1ATO&AiB})RT~1*LCw$_172jv`-W(TYnrMyjh6X>~4a4673LF9IS?b z?B(f``*VpIQI-FK3#j&+h#4PNAN1ION}9Zq08!HNRz=kATE~^;G{WSMS#R z4^PeiV44B%@;6V-zY)#9LNx#Gsrh%J`7J=RXZbj0HOB2_&(iUh>2>qp|+?V?FWzLd{W1G&63gezs_uGd9TM)Bb*8b(umrjAG2ylV#0}q9oRNn zvy^1*zvN7CP;P>fCypB zTYq>$Dd<%31z4O$0}+rk4|2}QgM*QM0%6Cd=OO6!rs$@)Pwyfuh_&GV29*<++H3wC z#poS*j;`r*{2zUOVzIz|VqTZuRJVIyTp-B0y6K2A0-y)eI+_ihr4D>#--PGdzGZ~7 z?cXwi4SNxaw6|jco5zgEz1?HI9V6^`vVXG|m$FGkpPypxcBHCKUJm&=Xj>270qG0c zQN`J2yCF?3*$(ZoXhoqQ<2-?_0`$b-fTUw+QXI!OJD@sI(x-fH4}$m3Y^vbS+T203 z*I`w6%gh%=BiWkNATR24vt7q{7)#@KRJ+Y4E}swSogWk_DV7AsmXDQ=|Cbl2&VTB4 z`d=ocx4XPOM`YJ4vi4NR+bq)GP!iSg%{ukH)ngw;R40qSbh9p&_fJ`s!eUl6*)lJwuk_bTVee%%|EJk0Y0BBll=W;X_L$aDO-e{)-l zs_w2}0S#@4a{fb>Up|Vsk2Jx7O@DASq8A*EX19RR=T)=v|Ym|40!y^eQoI8^hdO< zAI!C##}_`tnS>h4PW@Gy(;}?u6RE7R{6zkI#m_+>w%K`ut2~hVb@`aIDt~?wCy#Ny z#AeB-tLYCOjHTnWKE|F<5h(|>6&0rfMLDqjNy^ftMTQG1CnpU$!cat<*3$rULYWhR z<`xJn7M)Pu(aRT+&^BJ)$E(!sL-9GI5fI-pE_gx*pn9%w?~2SDTs0U>CQ@XsnI2_} zYb#3sC#CtG_iDX@Cg(7fIDZoQ`;6u=sMfu0EF_@;iRYzP9mZ@BBA4*QN4*!nm=Y`k(1Yis3paDl?(d*vH%F zZd8058jZe8Mb3%v#q-4{tM*QP@dWBr9){VeKqh!mn^_Tb)z>}&o_`kcqb@?87c^e% zw*5-1kihOok1_6my1@i($Q4=C;6m4}0wV^En#oJsvdC-P3Hj767KW1-hGrXoxHOIdprS~&!FFgICApO_ZH&PsY8Be<^BW_6$h>g`5WPRHO%i=~w&g#t4q z2THqB$#e_mMUO8leK8Zi(5uyRDAVmaUZ%7zlsi1@-=yqH11W*n3?;?&MY1@(SD~x# z3J7ecz0Jt&m48f-{Y`eCC%$aCC?zVZ5hO1p81{E`{ps`BE5-Z1)53kw+>hyQ28IBm z_U$9F4O!9ct>!0W-B|=Q-|2abA87mPqorj8uvZ~vNR@h&7NP70Zv_P z=}l#KS(2p6s=fN8w@43OXjXOltWF!&rol>1NV=Rne1E-MuMx`A*EuG?gkha{yTas! zz9J_h-DiPSif4#qDfJL=f8eFO6tDN#Xc+2!?UR{urhZ?T*r?8D2&DP`FgA2Di-AeU zuw)K{PC`BojbPoLggc?3jhXWpsZ_k?9-8TDpg>qmSuPt8RxBrfSBsfmmTFh~wx&05 zp7ulor+>b|#S9fs!f0OW|1RcgbWAR2uZWVE%;^@5SLX7G>2tn@M_AX|`;6`^e3(i2}x*=TnjwN@Q@_uL5-6hbX5C57#RaHN48oJJv}Fn4 z9%;jpcDs^W*1wUJH%a+4uN+T9GYq>Es*x&k_&~pH(%@{%9!vC2JX3c`Ii*jUWHfl6 zr(w%zFg13uZx9tO$}Ru~K#^tCZSjSBxNk|c_|AC!TNv4)6hh=_)7Sl?e%-|5AtSGN7RhH|Ul$E(}L zYZE#VDM+MLzB`9|TaPPm5`5Q)H*_QSFaF0X!`4TW54*b~ZF?gPawke;Sy+AV@5&4d ztSqU7O(*(({G8>V;~(OMo!ZOXxFG!7gMaqf7^YpM0qr1ddEB3VTuBu|2Fm*T6pIGm0J1dquvN^8TNl4q;Pf(okYwFtz;(rjo zAdYNYY&94i*W34QyK_!eIot|<=^N7C6Q?KXx+l(d0>$uR^e0uC8FRuvj#pOQC*+9r zj#4p%v~!=0X;es&>)tnNoA7il9>$QH5C*ca8adO7+N2ZP;V1pkU$$O&m1OEkM(H^ZsLyy+!PU-0m+ktpx8?rnS%teTQV*kT1H-Bw$8==T> zZ6p$kTx5on>E<`0H^5ibFm5zhD_8WnQGLNufQ*Vq(lgSn{M5>)GT!1*Y~w>v)9+$S zK_a^6Uj}Jht8@>aB}&x8-&;!7t^;OeMTSz#RhcDphu}Jm%REi!@3t4fQ2hH4$)l`o zTZYgcZfIH1<0KM`_N*;&4}W7p^RVO5EeqLC*Hqm{Vbott9sPYN8+FOXhY1DPGaMmKT2yvvJngKmp;5=^U66Hzr-@jH_8MSmy;65{LSBx+RC zDnkp7o*2_eV^cLPGt>CNgaza(E*E7%u9!}`1zK5)q&Hk3{Z$$sW;9^b5>^Q4er(%y z8s`BFfyR>nCS*^4|4fUAQCtbT5$da?5lDg+cxD}K@rcpeBFk%T>}}`ES1?nKM4c~? zB8tS{MK3Vbg)cX#&42u4k$g6#3b%`&CQ`MD14BwGkYDo&ye)E};}aLe1On+p}9maT3!@jE#YgLeLG zsOX{*Ratzb`4uu(^yc{M=H__JL~}4k!c+nP*;Y5{m4Qu>^olW}JM%-7+~>tp9%WT! zyD^hyTChy2x_|#BPtYg&gs2;QNBzEXXR;how9!83GTMFfoy@yc6maV;T3Wz%D^5*2U@> z2*qPN$b#>Cy0nnKKGa>3tsO3z-nKfP>W`*$S%v>`g5PM3zk)^%hUOe&c47KaK=q29 zw;gpAH-7@ntw!e2q`~33F{g3FX%ypc;3h|;0`3xVH?3v6ksU`7|qP4o*!V!J6-i!K(LT7Pk|+H@gOZ0Ib6Ga|RywxJLG81N|2 z9Thwj^p%ooW#WwAtVeGHCpVFsq4Q2t`F74#fiqR0N>LRgZOwp0pXw_i(~Z za!;rY?gh@oJ?7y)y1BDiM#bkeBN}fdfq7k8s}RobE?G=u=#;kE%o)#uJKRm-VY@xR zI)9WhiM*cGQpk0o*^`vBXNMB9cJ=@wVLJmwoLXgl$A`dAC>q=OiGDpWblu(VoDI! zx~B$A&hp!$bj&3*VUuMsu~hP@O?zcng*8;6-a@T*nO2{l_joi3c`pdFUQAg{wnO*U ze+?$-*NM%m#1=_zA*;p%)~K1S{iW_8P9fc|ffuy5?ZDXi-Np}5(mU2exO)>Ctny>WPt!TS~M`Q8-!&+C%XEMOn3ie^z0f*SKVVkWPFx&BktfU=ZoVTfVHf5aOBf9Jtz=8-zjqnvw zb;W-Y{d2X{cu>moOkFBs2 zgw+PZn}n4r=(;FO%YiozZK@nQ){~&=M7aBVqvdO(O;#Sr;wklRwZbLtkbdZ^vWkv7 ztABfc`)o8tLK7W#l5NWZ;#5G8k!A%aX+F>DPc!UTE1W7T0s*`_*Uo_v+kfD+{-C}4 z-@0qD8fbZ#Gw5TvJkZhHIp|%g88JnZbGBmVrrwAobXh)5`!jT|pL;NDbZHps7TPro zn@Ws2ZF5*lASr2D0jXVdka2%=Gcj;{eY5YXFtovdtr&hAF`%k>v9TQL?{E*@NWw$# z8yiSzh&hp^*69Gm1nu0w0e|WoxI%!fqgV$4z;F&i!8x{b(4gx~*0u=3g03-9Ep!>_ zywm2Wn~DroO|$bzzwgK-BDb_b`<$fKwUPH@kfe2-txw7p1f`9D$0%_Cr^tOWv zKD*O29e-!z+h|t63W$xYyR;afA7VDftirLeiioMWJpIO$`Jg*TW*`EM7Q*34GYGTM zAYZdLxZEe~TvqQ$Lo`;I>HzglTXaj!0p-RNDS_L+EBhh{8^^f!mB; zu1mHJ*mgv?_CqR35r0t{wMrG1=$s&4i-$ncga)W z0Jo{@Bg za};H_ADph$UTiB*wvtv8=ZI*1FQJ?2&DU*ZwE&OE>3-p!1>}72hCHm$FHO{&*PfP0 z+Eq(h?NUgcGl`30&xORLiMI3FYS%3pc;j*Q`WeO5eM(N@ldYTJ-l8#~RTNQ0QLE^@ z#WI6hBjmtYc>F~8&EqYvqTFzLan+Rz&{6?WF3v!mD9e;`= zecxS!P=YV&Ot*aJMOLMrTv}afX=b{vTO`FxU&zx}t^kw7?MMpe$Y<-hWk184E|JN|eAbe^*U`>N8Q! z6B$ZS|ICs8#F0Vbgxqa;@W$szQ@1DBfB1zT(aO|o3nCU)tsSV+Mgd|xj!0+C#@eN{PF0}!?|3s3gRsSEp zwEsX%%73Lbfc5W461D38)3;D?7#AORmVwYeYTY7F|G1q^yxTL`406%t2V6V7Uz?YgUVgyV*5h{c^8(6`)wcbu0Z=(Mt06C* zE8KNQWt*IWx${7-s$1}G#6Vbg^mcV2VlY4I*TK7^7q{~y>0saY_DFE|d|!*}tehP% ZYn5!(wx;+3w3alItsxEQ{|5uCk6|9si2?us delta 11164 zcmV;ND`V99Roz#RpMTks@4JKIi;j>b%g@msH26OEq9f{2RaA+gDzZefB&&b@hnWD0 zApw%5`!==;MCLh}#7rQO=IgAPHDxuwJ9s=gD2jvg^gg@XrbUzgU)tnl@g@J67ijZ* zJYS`CjSdmbJX@6W`c1mY-(ZnjEHZmgWM*q3ba2i%F9aFrj5 zw8S|U*=@RAHGi}FtSYkA4ghxo1a89y0|hr-LlGldtf7$M4b@P@de(>-*n!;nnPKSbz36d9lcTG73yA*$3eWdYNN5 zXu?}&MOLLvws;eOVJ#z0A;4(sWf`7j4?A>Xi#Cw#P8}P`dZD@Yc3aHJ(+PMk;`r@| zoq(A|k(=$ziw49f#+9#GYBLT~4;?@Z#ql-~;{)x=w2F)+Cbo)V9HgsN`7|pZv+AkJ zn`|eBg?|re$AKj?WC6K5#8tLT=g+&gHLvOPBu`glK)D;tDqH4tlU4e-k`90~oV*(n z^>i+$qkL~=OjZdA{fQ@`?EBk*RQ?4J1YJXz?n;^!d+wak4bfvj&Qu`|D?v1%! zm1zT>1=HLSlVB=$a8CY>pUp9Fi;@6q5B>%e+7{jGlw>fa_%{<~%1c`jY(SG0by*#> z`4``53bZ(IdC+^eVRh!-pcR5qa9 zaetdHXInh9sOcCcoj3U-n?tnA=U8wiyD{^s%gw5s-#_KGzP=qnz^rI?TUN8DEMMNS zu6<$>%c|UNu+OI?gFAUVQdzBdXJ6F~s++~}D!s{8q%qeY6ESr1>C1=j=o5QD)fu$x zrHjY3m}iS5S2EHc)t2(CeLxTuCp?QrhkyPMS&ZDs8_=}+x%5R~et6jC^Lruah0$|g zl3xPBq`b;j8J-{}n{U*ngP;D5S| zGd;tZIdr@yj}RwGA0ht8$to)z^QtV!%S&(@wcWtU^hH0kDTt;xp~R;qk8yU)TR&r2XLj~hdE?=iuzBza7$JT2gL zQ4_k(8pQv012t*Yh+>Mn!6wa%kbeqjk-sL%qKM06CzCCCb5%y_O{3KtI2pT&zsOo4 zf1=gAKBc>|Zv1T^&1Gu=(_%qhfT>am?WUFsi$N;3gm&wQ*RTIG z#FQh9i+{3NBA(ejjb=|@v)8DjgF|8yoA91v5rAdHU6jPYLTuF|SQAfE>fr?$cQ|wq zUgu2~P0*}Fuv4FgP-|b$7=KRmXcJ_WG7;xcR>H<0Y*i$=(vlfQbA$iTdt*H5+JMQ~ z#4m6<>dcsNx`RlSZC2?#qlLTL77gw;lBc}6(-N@tP-b9#S1cn^(h4yHR`x7Iz`z#HR8`f8Wn8T+&C4mYm8lVq4iR+##?6^G{~Faw2Od<|}v+ zGE=!hIaIX^FDP)~B9r-`dm>~fp6G(c3Qpyf{-Kp!xTTL8Ejg8AdM9gk;g=q0tYD~D z#veqPHG&67apOG!|g-FpyA;VBjKaML54a9gTVvA0R|hw1x#qS0WD5!0x#^w`ar8Eu@*k& zA7ZU@s2@7aA7X9b=zfsR{y}}*fPR4e!Q=QL4uA~bBZuw-Tz?QcVjtoGz(752kUqfv zdZ&$i9oNX$Q#bN;wkzuctq!(!v}s0Pk$(y?=J9Ob#BJ=O;HdpQwOxB*C*pVMg+OXS zxhHq)OZiN}ozmLfRShjnhN$-wTNH8*b%)mPg~V0~$ouzkCTRBla!$3){2?+EJNYGw zqr{xPAkC(4Xn*v8C+J}}HvN5?nohu%5MBX!r*`Ly_)3EAz{*_?OKq`rkm143Ox}wO zL-Zn^iX|H-w!3#Aek3pMU=x#_x@MOZ(sCXg_0xA{?_QMTi+E`-F1}ur)Zg3vyB%ha z@)}}>-hJtvy_UaAkvH&&EUcEZ&p~lQk%4*`_#O>gkbl(!NJtf@?EgRl;t2U53ayfJ z@mC-X9&82>)pY<(I7sk|WkF8pze77xf4mfAc{q?FAPkE|dQ9U6gtUp4oZfLS?z1R& zjqj;l6d)VML@$40Yewku)X-iLO_8m`qfdj7AuTWq_YzGJ4iRb#k8Bgh`2Xac*O|jqhXJvXLOeVVlYnX zHUR#Jx<+?jKq$*e-59`Q0-aE|%14R%D9SSyRl^#Z?B}Me;@Sx=`horj#UlYRHsw=R zC1!Jy)#m0QiVjTk=kf_%kX#nOpz$truyR!Q0oH3p=uYxrJn&}Z@s zoqv${W6m2MTs@QdJ_sZN&La_VzV>76g~hcODIy_|;@-gPF@}i)ydhE(ky?jZXwVS# zUU)dhV0LRc}MGcpOwUa$? zml7ba*qa>Id9hA^cFVofcN1Kkg3Qk*wtsMM3`M(i)7~i8;f$eV>NoQ@@U?})g#Ku6 z%-KnO^jxEZ?9X>#?cP|*cI~#k(e8g5OVNMQ+v?sN^`H9kx^_&YcNeerZ&bA$R35r8LmL1y(@Th5H_yH0& zEkt~5Z}`rJjNt=&1U0|V*!F@0FO6Y1Ao5ax^w_R|$6y-67my%KA>w0u100TN3}*o1 zF$G8iT*2BA(o07-dnOg9B29jJfsrBKi?@|~EM%pj3ZJUK0MIfu;TxfS8-H{xEUbFE zV-w@%9T>O4r0S@V66{;7g+lK$Ec?~_4BK9v3>p7b`nbskg{p~MB)Jph#j~{ zgBwZDbdFhN%XI#{(=ud)9a-Nr4$Eu0;FG5-I$)jNRUN?5G)?24DqH4tlU4dwS~?ZX z8ppEyc>eM+m`?RH9<5)dhSnq3&^oM!2CQ)Cr$C4VXa*KGfvmzNj(^P-|G*WkezVS( zY}!9tZu5mZiiBN?@A)H%8J1ucUgvA{cm&7Yo8P4cxp6fw60s{jc|3}wW7BrZl933A z9L};_WQo}F8BI=8a*BVU5BygEm3(m+OMr-;Wt%FSlaGz)JWHEx6$ZX0TkWGgAjChh z{{w(oT9!2B7{$b*ZGUiKmTAeRD(6{U4|{Wfw!7c*5chkvudCNPqh@z?*7kit7mdo*wHH zqb_xY9@jSd{oTUvVVlqI>4);5UaKd@*F!}r`bbg(F!yfKW`0+c)jGK?SBqE*T{TCq z(R-5e6{jGQ5<-a`gi4ybtZmPrFU!o%u92O#Lih<1PtZE>z3tJW-fYP~7=i86pf?(8 z=cUr8w94d>ihpQZqQK6D5Ve$B!ogdz-ZvUm<-jOUvqrs^)(&7<~v!G z#1L*|frwlxPA@i(rKKtP#(p63BZLVvfW0`{Dw1Y(Q|3j(-{Wmr!OrXW=!PR*FbP^d zgao1zytv$U&U^*N4$F!;p=cw}r)HPd-Gitkd{(nyIgx69-NXn41vR*|qG^mmv zfn)^?<0cXlnmB-2M^V4;*w%RdO&`8V7im>)i$%Y8GG%Hv(xH<@dPUN#+V;ec{@l=} zS{D%rY+~p|!5NZy+WWxk$7%&7;$L-!PE-0V*NV$F?eQcirY;2apt${)@D-+98RTG%%M^^7$?#+C`3;#1X2jO z%Xt@2lafIFvaW-XPU--xRYd%jzayTLObP z@+vR>{a19ACG9gG27j41MN+9^LC`iGk&L=}oPR~d$j7Lm%cEC%OkBK0$cXPQM5(I2f5l2U8sk2o! zcNT3UmA<$ja4k~26vSYeuJ$apRTf5}FD?e$7b3+=frS;pwPKFv=S{VZ@Pt{#51NW= zmVcV+r;`^&`4n6(TCpGEFCD5}y(_kuW{gXYpDa(6z`FBdHwV}-@msr_Ik>m_x zBg%Q3FX8#>L}na78Kytjt|e}W7SA&kpUy&OvW_@i4G>rS{gLL@=v|TAmg)!N#vQ{l zYyP2`0c{1)2sqG`)=p`lOZUiT+?7vB-hU*d)2j#b)|CAdccFQM=#Lm?-cbUSm3!a- zWJ8waSLrkUH~YC+l~p>*i#BV$5UFJ4P^4<4EyvD!Ie4A7pVGp!q{=WSR}Y)n&x8>H z(^7$v22|4^wxT75@R51jk9tO(&O>Tu)!svDMxY$`^>_fn2G?M>8l*lcNltfptACh+ z8LEx_wtt!AZTe7$vY|5PCzpU?us8U<^cBRsi*iH!k*<;^ zo8J}r11`hlL+_F-gy1$5cM_V5vwFK~g8dQ)@{LQOR*HNy#!`q$A=P*;1u@yz2HUJ~ zw#R0ycDNxv@VRD-7GSfIcFc0#rUEEcX>p&>1>6WKy6_eLg|7H72y}Lvk$)rqS#Swb zB(LP307S7m9f~LA#G;*Idw8tstD z*$#U|l{VS_Zm}PPKp-vNLfA``GF}{FLJ-b`=FY3KuEpvT&asl)x_=yB1|jx26`#_F zpV{h`^04>e5MY6GQUqaYo*@B{y@PfLx_bl4u@_DF>GtV-iS>rWfa%c))+HkgBwxgm}65i}NAljf^z(GCn2Sv{}vjp4pn4q+!euqqPf%AUYX$f(%P%&wL}I2bn{=8k@#j|&esI5lplRW zaWIW;n3WaBU*;`x-3lPvUAiClyWEcpr(qgWyY?Rk5Cy0n*A!T}buk7MVh8?1%I0`= zJb_ll<;i!t`?C(F;GJE8SV%2|$dhRz<>Ny3L#%PVSg^AXy?>p=qm{EzpesE;U4c-` zU;-_JT4W)3rPjsNKsBriT<7cU${4K(OhgZQl-%OZAYq$OT$^k>EZs^qs_2|Vj=mNT zLzt8A&rO+#2jpwgB7e*mc$k@(2xN0W_vtVy7LR-yZ%QQPP%CHjMK-r|zIIoHHE0Ig z0@|}ax>kj1S$`%IM=-7cYB22s>qXdBCy%JD^Yv^B@)L5cz5%v70cZme1{z0h_5%Bt zWVU&9{>uH^GAMuVcmgb6VM3=B)uff@9^{{)CWctddXvnp={c6jc0wsSanp{>II%lLRpgY zqG982oth>u599yv4!;EnVN$mO6=hy$frQ+;uH{qRZ(qsNK2fY}c69J|CJLj!qv%Pr z17#7Q0Dl6qS7cBA?_zdDUH&VsM#hg{c6?Z^HDd#+t`XB3D*^_qiibOVTgxI7Pob~m zuUeKh10BBgY*v(w=L3bFQyGGYooZjprt@-p%K1n^?02K-0f0vFa1y1Zw^V(+qf*zXfXkjcEQAqWSkg&A$`P?*N(u%f}I`F@7%xmX3E!ub*#V zu^^@5=n$YcuuzaPar73TH?T<167eU%zkvmUmWMwM-oh&1A7RS>;L5#v{|wXnCvQX{ zq<=}45bZQS^L1Z+y-7(Z!dd<3KESW$Q##ITmW;0db>>pbdp(|-;8ZY>LDUWXnAh!R zY4Cn&yOd(>ztl`{P;P>jCz6O!)i839O~W2istVhNVK<3IR_DuTd@g-P{|i9n>%U}6 z3<<(P_G-maFsb6p#5j!x^dM;g0rkKb_rUMd)owi8*A7(bZ;-NWK)VhKd#>GNOhgO9P$J2u74i9 z1JW0Cql!c9ZbM`))ehtNcTJ%n<2`Dx1N6k;fTUxPDV}q>9Z-`f=~I5RC;oefb9HcU zZSEkt>#(l7WfqEJkZeto$g?I}Z&oQD#?qiM^>)2ZtLHcL?i`A=DCY#nm5-H<|5uc# z$(vR7UpA$;+oC-%=GH5+_Ee{vJby9Y^piB{%_`^FI1 z+0SItJlJbHkJJ4IXA){0M>3XKL5r}iPo%QO@)P;<6+iI)rp?Y9TaV&QCgL2l)* zjcRQq$VwjWNJ*t=^pX$eK^6#w3awtz<9J5pJvMhGpBY=)GxFTpnvwAM*T(un#7u1& zkpNUziE3)eNQ{4`Bb-uTv(fU1X*4Jzu*qnVAPq*VIjFhlfPX|a78E+Bsc6B6H53{F z7E%~3vKw6Cqlxgq#7s+{kQu~HY`NJ+)4{TR5+5)Aih(8zw_qo|XJ zWdjQ=(L?R96@OXcU&=bD(5j*Aqq(U{`oxS7ZdS_s7{PU|Hm3s>&|o*RaykZAS}dJ3 zDHNC?Id|KgN@g1{FM3X0>x-HAg;}kcLzQh-={%!#q1xhE|2kt=gD45aW+*AHFHyzm z{Ulv|S3qDl?M+T@fn|d1Z?XY+5z3Z}QlhFFLGnU@VSj%o*Pp(ez0$lNIxXA>&Hb2e zIWPnmweKE@t;vdRZ#`%8oQMZ9@DRDTo~`UCPd#E6&&?eRG1f}gS=##$$kLe1HG6(R z1~^T*p*Q~B<%>n8t=j8PdW-bng=Sr6$m+CZZ5piPn53)8!`I8r8lgNxon!M$9M*BL zD{NlqD}VAbGJO_UW_X53mQvpU?%}*tm*UMH8x2GAayFSMXZrVriH+)fhCqhz4`V|w zc4FY7W7uLB1|7$I92&v8Jq33{L#3I!F;c5|@;;~OYM?-vO<67_2rHJ8zpL4fUY2fG z`?jVxa5wFV0ZxB|vmI1C1w+2p|DEls(XqLpy?<&=VX{lN7`(D8pV&U<8+gQZt-a5< zK|3pq`zoLsj_K|i$9HkS&JLmCl>HCK7W;q(p|?uT#b0m+2Gf4@5M8Hmy}of zSSF*v`#ddMMx&{*3$mlAa8Y)VHUNq&qi)KltU?xl{Ic5IJiLzLLY8HzZp)t9gs4WJ zF@p=r!0a#d%jb*tm#+b~H01`-B}vo^bbknBj5Q=w0}Xcg9qao{^^rh_uKl@|a%!Zzw-QAI{y^$8VV=aU*tiJbm zWyVF&R#d{K6Ma8^$%`-PPw~P|++}WD5dQ7K_-qW*F0yK#;S8&6V)N`kyii%6kAGq2 z^+QojY@i8(f6g&H8N;Y`UaZrfB3(8V=45L8G`dSXWqi*; zW!mCj;}Y0&j45$+mso4Xo)O+xRa_IM?q>R9=LLOd<$Oyv$Mrf1Wn23(N^=^Xc7@uT z01>|lkZfFRHCP?j+xPCeTu@gz+$!T%qLakjCsL7j@M4z z$K;6hmQt|<-t(W0kt&qz{j>)2sY8o}oH01(92WH6bc3tvOgH4=B~Jmb^nzU(smp19z;P)>!hfec%pX;d zAFPM3EPinx*22R9l1QROXaUb43XaMB;AE#+ULX-Z4RvF%!HNST(u(WWZrS$|y*<3>Zuaz$UH>I;?vWK=$qo{?_lr&c~y=?0Htr4K<( zzmF{iN$8$`6-2sL>mI%+l&FV)w3M7(2h7Tv45jAFDqqkYf~zd8ifloDx4i&{;@^iz z9&K&gF@z3qL(765CsA0mXKjgl7z>(*9gl8VsD8Sp>OKmiJ~MUn_kX2o)Jba;{ru5B zg*j543*h>|M$VD$FzQklc5uz2!(uZvdIFS=VSznVS3q>-B9-4x4U#=Re9fl~xOpT| zot;BP4+nnHq^b$4|I$?tWOno#-K5R@t|+TDx-D0WXj)QDLe*f!?@7v6p%_Srua}dk zQL`l7B5T)A+%L1>`Djk!3-ym`=I{T3d^x_i!Ndg&zTCNHA&%D+F{u zw(HW5^9Y7O<4FJ$vZud)rX|2Au7q8j^-a+TB*6+ivyQfS#OO_#7Y#S|w)5pHnkh%3 z&KF1tMdI(G7ntfIlpEA$ezHhCn^MQy#ZMFI+Qfk&EfvU5et!aQi(D)@xqC?#dD#WE z+QGDB)7|y}ail~lyF|j!DF_WoJMw~JTEbY^#*q@K$12vm{k}chWs)vZdRw(M(2>ZX zAF()Dr1V;As)8I4|9^_E@H9<6C5p*X6W~@8C2pYYQWJrqM4L3-SAsE@nBwA@LDMpo)Y&SV zI}KwpYW%_(6`e^@oyA9*Un6rx@}p~!pD-~Tj8QPP06?zQO?tDUERsPnMoedZh?4uF zd@7Q>u3cB7(o8d!NmuvZHQqwlHM$We5Als|~Qms7TQJhRDxY7wb&ZO-_1t$k&|x z@Q+A_K7U@FJllo(WjD628QJX}ATZy)#^bwU>t* zA!&?>z~K`A5-5ZKuH!)-*j-TNs$4z;p?L5JRq(W@%Lp0jL(_HZ#_^b~IOzkn{&3Bd zRrnt#gbfGzE0A(PG@0L7gu7Tmij`u4(JRI+j(>Ns+=b|eGc_yrUfa}FECuBCO!i^1 z<;9u?&(@){RgAysn*vp;MA(p>y>&Y3S9OGw>CteN2P2g(30gL~6uL#EOT$}EjL^=) z(uMGr7$bzU*pSa{1s<+$s;wLs^@~j>ms)I2*&}qp9yxULQo~b`ifEjV88CUF;g1rb zB7c~P;NYK>8CeMYR$O2!Q!hKZSiYlQgch#*agFF=;jR@It9KV7#f8p7Vk2^gaT`i9 zi~*0T-BH0qYhP)pPA1-X*k<%L5Veei4V}l^%C~p!51jb}Rf?*hXpp8h;Hs|mQnLL&3}l-TbW^ASJr-n^Setx6B#BAfN}sa|Wj{Bo(sLm;l$F$)s9G;1 z)`vFrRd?yoQsK4zpRh&6j_~pE9sWCSvi0%58?3*<#{3(yjEOTRBr_6Hebug=q<v<+B;!Y6bf+?SMmUfpN{&EST;1LRHd^FV5RkH=8Og?tc+o<&0oK zgcB)zMO0n!Umn!#HY2BFvf$F9NM6Z5-C$y&Hxx=N_{#O(J`DYPg?(K+o98y0jJ1eK zd~Ai4ptdd$-Xtt_LEpt|Mh?7jXj9c#ynzI5C+z*-ZyjH6U9##x7Eh_S%Ox&xZ|H~9 zIN$7Bo z-I&9#G_W?1$`ErROXpQt*J3^CLD^a2`hEs6O;Mbvis@&yQ~K~TTUk<3JD=A)hG)uS z2SwP1;SL#RLGJ+Q{&O`zCL~RowlbRUyG=_6+kPLM2UZi*Zi9nuD0G!+#}-7$21JlH z(-6sZVkTspq+!I=*?~El6|lnTBI_@y7U+kR4bJO$Y^)+-Dt|6dzB6SZ=-weMh=9~W zI6N6fuPz#78^9Wu`vp57*gIR3j8&#OK)qu>-4b?XAf}eL1X$ksVJ1A4w!iNcy6Y99 zuoL#;HmA2dll>O1tt7sUxmr>}lqRiGjU_rKh}YsFkTm(S+muKolCLa_{4rnP5g;-d zl+6L%pTi#V@qf1|6N#6!c!|kYf%i`bPm>$C9MB74ZI!_+cvGbV>x&0%`E!I&31Fqw z!dAQP=z(y`|>@i=?+jMDl(BjF&(o>y>hAx&r% zB~($;Dmrg5PAJBuc;uuO*1k(3@K&9G=%sOfhbJAOmg_~2tI*3COAH{=7EC+?8%C(D z3sjY@0)OCn9e^Nd*20F#IxBMOsU0AT!>>Plqhx|F4Cz+DqRi_okV~seuWn4&b&I5V z>nnNs%6GKa)$ALO0qci=Jrc(SM(}BqZz>v z)$m0@9^TfnjK!l=KYyKPKAUHeI3~Bi9=r`X{Fg5de@9Hr zsrJ`kwZ9=I=2ZJzu-bnhCgxFl7^wDFruNXM_F(h;?)O05?~o$v0XPcOeZ+Jh`P9bn zK17PF6X5sY*Z3AOF`wE9-hU!R)T#fEP}+YWCgsx_!TNWkh&uKE8Ctk$P5jeSw*QPM z+kZ5_BI^P;dK>C&Qqzf)Q|n=TJ=$>v&IYw^j8oLM0O~=fQxw$nw1qB^uXO^hv;GL*?NCN*3Uu%BV zLM@B|2#s4D6Gp+Z<5yP|%?q;O)x$?sVSm}|YR6w>b-s*-=F(^MKNuii#U-0XNDwHp z=PVYTFVK?RBP7HWRrSxldKfS7FyBMQIn=kq(qj?yHWGAG-e-3R3q6t#$ozRM9icZ# z6RJ01dK0df9lv0>? u9_bBal5%A8M|;@2dvdeI^;XUfm=&{1wrX2bLIGM!q-1M|1pWVCqX0>{Af`D0 diff --git a/releases/1.33.0/torch_v2/searchindex.js b/releases/1.33.0/torch_v2/searchindex.js index 5753753..1e1b454 100644 --- a/releases/1.33.0/torch_v2/searchindex.js +++ b/releases/1.33.0/torch_v2/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["_templates/autosummary/class", "_templates/autosummary/function", "install/index", "install/install_docker", "install/install_host", "toplevelhidden", "torch_docs/api/nn.fake_quantization_mixin", "torch_docs/api/nn.quantization_mixin", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.Quantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.QuantizeDequantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.dequantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_dequantize", "torch_docs/api/quantization/affine/index", "torch_docs/api/quantization/float/FloatQuantizeDequantize", "torch_docs/api/quantization/float/index", "torch_docs/api/quantization/tensor", "torch_docs/blockwise_quantization", "torch_docs/encoding_analyzer", "torch_docs/examples/ptq", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer", "torch_docs/index", "torch_docs/quantized_modules", "torch_docs/quantizer", "torch_docs/tutorials/migration_guide", "torch_docs/tutorials/quickstart_guide", "user_guide/adaround", "user_guide/auto_quant", "user_guide/bn_reestimation", "user_guide/channel_pruning", "user_guide/compression_feature_guidebook", "user_guide/greedy_compression_ratio_selection", "user_guide/index", "user_guide/known_issues", "user_guide/model_compression", "user_guide/model_guidelines", "user_guide/model_quantization", "user_guide/post_training_quant_techniques", "user_guide/quant_analyzer", "user_guide/quantization_aware_training", "user_guide/quantization_configuration", "user_guide/quantization_feature_guidebook", "user_guide/quantization_sim", "user_guide/quantsim_2.0_overview", "user_guide/release_notes", "user_guide/spatial_svd", "user_guide/visualization_compression", "user_guide/visualization_quant", "user_guide/weight_svd", "user_guide/winnowing"], "filenames": ["_templates/autosummary/class.rst", "_templates/autosummary/function.rst", "install/index.rst", "install/install_docker.rst", "install/install_host.rst", "toplevelhidden.rst", "torch_docs/api/nn.fake_quantization_mixin.rst", "torch_docs/api/nn.quantization_mixin.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.Quantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.QuantizeDequantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.dequantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_dequantize.rst", "torch_docs/api/quantization/affine/index.rst", "torch_docs/api/quantization/float/FloatQuantizeDequantize.rst", "torch_docs/api/quantization/float/index.rst", "torch_docs/api/quantization/tensor.rst", "torch_docs/blockwise_quantization.rst", "torch_docs/encoding_analyzer.rst", "torch_docs/examples/ptq.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer.rst", "torch_docs/index.rst", "torch_docs/quantized_modules.rst", "torch_docs/quantizer.rst", "torch_docs/tutorials/migration_guide.rst", "torch_docs/tutorials/quickstart_guide.rst", "user_guide/adaround.rst", "user_guide/auto_quant.rst", "user_guide/bn_reestimation.rst", "user_guide/channel_pruning.rst", "user_guide/compression_feature_guidebook.rst", "user_guide/greedy_compression_ratio_selection.rst", "user_guide/index.rst", "user_guide/known_issues.rst", "user_guide/model_compression.rst", "user_guide/model_guidelines.rst", "user_guide/model_quantization.rst", "user_guide/post_training_quant_techniques.rst", "user_guide/quant_analyzer.rst", "user_guide/quantization_aware_training.rst", "user_guide/quantization_configuration.rst", "user_guide/quantization_feature_guidebook.rst", "user_guide/quantization_sim.rst", "user_guide/quantsim_2.0_overview.rst", "user_guide/release_notes.rst", "user_guide/spatial_svd.rst", "user_guide/visualization_compression.rst", "user_guide/visualization_quant.rst", "user_guide/weight_svd.rst", "user_guide/winnowing.rst"], "titles": ["<no title>", "<no title>", "AIMET Installation", "AIMET Installation in Docker", "AIMET Installation and Setup", "<no title>", "FakeQuantizationMixin", "QuantizationMixin", "Quantize", "QuantizeDequantize", "dequantize", "quantize", "quantize_dequantize", "quantization.affine", "FloatQuantizeDequantize", "quantization.float", "quantization.tensor", "Blockwise Quantization", "Encoding Analyzers", "Post-Training Quantization", "MinMaxEncodingAnalyzer", "PercentileEncodingAnalyzer", "SqnrEncodingAnalyzer", "AIMET: AI Model Efficiency Toolkit Documentation", "Quantized Modules", "Quantizers", "Migrate to QuantSim v2", "Quickstart Guide", "AIMET AdaRound", "AIMET AutoQuant", "AIMET BN Re-estimation", "AIMET Channel Pruning", "AIMET Compression Features Guidebook", "AIMET Greedy Compression Ratio Selection", "AI Model Efficiency Toolkit User Guide", "AIMET Known Issues", "AIMET Model Compression", "Model Guidelines for PyTorch", "AIMET Model Quantization", "AIMET Post-Training Quantization Techniques", "AIMET QuantAnalyzer", "AIMET Quantization Aware Training", "Quantization Simulation Configuration", "AIMET Quantization Features Guidebook", "AIMET Quantization Simulation", "QuantSim v2", "AIMET Release Notes", "AIMET Spatial SVD", "AIMET Visualization", "AIMET Visualization for Quantization", "AIMET Weight SVD", "AIMET Winnowing"], "terms": {"name": [0, 1, 3, 6, 7, 24, 25, 39, 44, 46, 48], "escap": [0, 1], "underlin": [0, 1], "qualcomm": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "innov": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "center": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "inc": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "ai": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "model": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "effici": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "aimet_common": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "quantsim_config": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "default_config": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "json": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "The": [2, 4, 6, 7, 11, 12, 14, 16, 17, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 51], "pytorch": [2, 3, 6, 7, 24, 28, 29, 30, 34, 39, 40, 42, 44, 46], "gpu": [2, 3, 38, 46], "pypi": 2, "ar": [2, 6, 7, 8, 9, 14, 17, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51], "avail": [2, 7, 27, 37, 40, 42, 43, 45], "environ": 2, "meet": [2, 29, 32, 33], "follow": [2, 3, 4, 6, 7, 17, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51], "64": [2, 8, 17, 22, 25, 28], "bit": [2, 14, 17, 27, 28, 30, 38, 43, 44, 46], "intel": 2, "x86": 2, "compat": [2, 17, 27], "processor": 2, "linux": [2, 4], "ubuntu": [2, 4], "22": [2, 4, 27], "04": [2, 4], "lt": [2, 4], "python": [2, 3, 4, 26, 45], "3": [2, 11, 12, 16, 17, 22, 26, 27, 32, 38, 41, 43, 51], "10": [2, 3, 4, 6, 7, 8, 9, 11, 16, 17, 24, 25, 27, 33, 36, 41], "20": [2, 6, 7, 28, 41], "8": [2, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 38, 51], "cuda": [2, 4, 27], "12": [2, 11, 17], "0": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 21, 22, 24, 25, 26, 27, 28, 32, 33, 37, 42], "torch": [2, 3, 6, 7, 8, 9, 11, 12, 14, 16, 17, 23, 24, 25, 26, 27, 37, 46], "2": [2, 3, 9, 11, 12, 14, 16, 17, 25, 26, 28, 38, 43, 44], "pip": [2, 3, 4, 23], "apt": [2, 3, 4, 23], "get": [2, 3, 4, 28, 31, 38, 49], "liblapack": [2, 3, 4, 23], "python3": [2, 3, 4, 23], "m": [2, 3, 4, 23], "For": [2, 3, 4, 6, 7, 17, 23, 24, 26, 27, 28, 31, 32, 33, 34, 35, 36, 38, 40, 42, 44, 48, 51], "other": [2, 17, 26, 33, 35, 36, 38, 40, 43, 44, 46], "variant": [2, 4, 28, 29, 30, 39, 40, 41, 44], "latest": [2, 3], "version": [2, 3, 4, 6, 7, 17, 24, 26, 27, 34, 45], "from": [2, 6, 7, 8, 9, 14, 16, 17, 21, 24, 25, 27, 28, 31, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 51], "whl": [2, 3, 4], "file": [2, 3, 4, 17, 27, 38, 40, 41, 44, 46, 49], "host": [2, 3, 4, 46, 48], "http": [2, 3, 4, 32, 39, 46, 48], "github": [2, 3, 4, 32, 46], "com": [2, 3, 4, 46], "quic": [2, 3, 4, 32, 46], "1": [2, 3, 6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 33, 35, 36, 37, 38, 42, 43, 44, 47, 50, 51], "13": [2, 3, 11], "11": [2, 4, 11, 16], "x": [2, 14, 16, 24, 27, 32, 37, 40], "download": [2, 3, 4, 27], "31": [2, 3, 4], "aimet_torch": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 37, 45], "torch_gpu_": 2, "cp38": [2, 4], "linux_x86_64": [2, 3, 4], "cpu": [2, 3, 4, 27, 38, 46], "onli": [2, 3, 4, 11, 12, 16, 17, 24, 27, 30, 35, 38, 40, 41, 42, 46, 51], "torch_cpu_": 2, "tensorflow": [2, 3, 28, 29, 30, 34, 35, 39, 40, 42, 44, 46], "aimet_tensorflow": [2, 3, 4], "tf_gpu_": 2, "tf_cpu_": 2, "onnx": [2, 3, 23, 28, 29, 34, 37, 38, 39, 40, 42, 44], "14": [2, 11, 27], "aimet_onnx": [2, 3, 4], "onnx_gpu_": 2, "onnx_cpu_": 2, "previou": [2, 27, 32, 33, 43], "brows": 2, "each": [2, 3, 4, 6, 7, 17, 24, 25, 27, 28, 29, 30, 31, 32, 33, 38, 39, 40, 41, 42, 43, 44, 49, 51], "includ": [2, 17, 26, 30, 36, 38, 40, 42, 44, 46], "multipl": [2, 4, 17, 24, 34, 36, 38, 46], "format": [2, 17, 25, 29, 35], "torch_gpu": [2, 3, 4], "torch_cpu": [2, 3, 4], "tf_gpu": [2, 3, 4], "tf_cpu": [2, 3, 4], "onnx_gpu": [2, 3, 4], "onnx_cpu": [2, 3, 4], "package_prefix": 2, "_": [2, 3, 4, 8, 9, 12, 23, 24, 25, 27], "platform": [2, 38], "setup": [2, 26], "bash": [2, 3], "command": [2, 3, 4, 48], "shell": 2, "nvidia": [2, 3, 4], "card": 2, "comput": [2, 4, 6, 7, 14, 17, 21, 22, 27, 28, 36, 37, 38, 39, 40, 44, 48, 51], "capabl": [2, 24, 48, 49], "5": [2, 8, 9, 11, 12, 14, 17, 24, 25, 26, 32, 41, 43], "later": [2, 27], "docker": 2, "To": [2, 24, 26, 27, 30, 33, 36, 37, 40, 42, 43, 44, 45, 48, 49], "us": [2, 4, 6, 7, 8, 9, 16, 17, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 44, 46, 49], "acceler": [2, 23, 34, 36], "train": [2, 23, 28, 29, 30, 34, 36, 43, 44, 46], "modul": [2, 6, 7, 17, 23, 27, 28, 38, 45, 46, 51], "an": [2, 6, 7, 16, 17, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 38, 40, 41, 42, 43, 44, 45, 49, 51], "enabl": [2, 3, 17, 23, 26, 30, 34, 38, 40, 42, 44, 45, 46], "minimum": [2, 11, 12, 24], "driver": [2, 4], "455": 2, "i": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51], "alwai": [2, 33], "recommend": [2, 17, 26, 28, 30, 32, 38, 43], "especi": [2, 38, 41, 43], "newer": 2, "both": [2, 11, 12, 17, 23, 24, 26, 38, 39, 41, 42, 43, 44, 45, 47, 51], "cudnn": 2, "more": [2, 17, 23, 24, 26, 27, 31, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49], "interfac": 2, "support": [2, 17, 31, 32, 34, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 50, 51], "There": [2, 26, 28, 37, 39, 41, 48, 49], "two": [2, 17, 24, 26, 27, 33, 34, 36, 38, 39, 40, 41, 44, 47, 48, 49, 50], "wai": [2, 17, 26, 27, 33], "On": [2, 7], "your": [2, 3, 4, 23, 26, 37, 45], "machin": [2, 3, 36], "our": [2, 4, 27, 33, 43, 44], "pre": [2, 3, 4, 34, 39], "built": [2, 3], "develop": [2, 3, 4, 7, 17, 24, 45], "imag": [2, 28, 40], "pleas": [2, 3, 4, 23, 26, 27, 28, 29, 30, 31, 34, 36, 39, 40, 44, 45], "click": 2, "appropri": [2, 3, 4, 6, 7, 17, 24, 32, 33, 36, 43], "link": [2, 28, 29, 30, 39, 40, 44], "contain": [2, 6, 7, 16, 24, 27, 38, 40, 41, 42, 44], "thi": [3, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51], "page": [3, 4, 32, 44, 46], "provid": [3, 4, 7, 14, 17, 23, 24, 26, 27, 28, 32, 33, 36, 38, 39, 40, 42, 43, 44, 45, 48, 49, 51], "instruct": [3, 4, 23], "insid": [3, 6, 7, 24, 27], "variant_str": [3, 4], "ONE": [3, 4], "depend": [3, 4, 16, 26, 32, 33, 38, 42, 46], "desir": [3, 4, 17, 27, 32, 36, 38, 43], "pt113": 3, "tf": [3, 40, 44, 46], "export": [3, 4, 23, 26, 30, 34, 36, 37, 38, 41, 44, 45, 46], "aimet_vari": [3, 4], "one": [3, 17, 24, 27, 31, 36, 41, 42, 46, 47, 50], "workspac": 3, "absolute_path_to_workspac": 3, "docker_image_nam": 3, "artifact": [3, 27], "codelinaro": 3, "org": [3, 4, 39], "dev": [3, 4], "docker_container_nam": 3, "any_nam": 3, "note": [3, 4, 17, 26, 27, 31, 32, 33, 34, 36, 37, 38, 40], "feel": 3, "free": [3, 38, 39, 41], "modifi": [3, 4, 38, 44, 46, 51], "need": [3, 17, 26, 27, 29, 32, 36, 38, 39, 40, 41, 42, 44, 46, 48, 49], "you": [3, 4, 26, 33, 37, 47, 50], "want": 3, "If": [3, 4, 6, 7, 8, 9, 11, 12, 14, 17, 22, 24, 25, 26, 27, 29, 37, 38, 39, 40, 42, 43, 48, 49, 51], "skip": [3, 31], "next": [3, 27, 43], "section": [3, 4, 17, 28, 30, 31, 36, 38, 44], "any_tag": 3, "t": [3, 28], "f": [3, 4, 27], "jenkin": 3, "dockerfil": 3, "ensur": [3, 24, 38, 43], "alreadi": [3, 33, 43], "run": [3, 8, 9, 17, 24, 25, 26, 30, 34, 36, 38, 39, 40, 44, 46, 48], "otherwis": [3, 4, 8, 9, 11, 12, 17, 25, 43], "remov": [3, 26, 27, 31, 34, 44, 51], "exist": [3, 6, 7, 17, 38, 44], "new": [3, 8, 9, 17, 23, 25, 26, 27, 38, 42, 46], "p": 3, "grep": 3, "kill": 3, "rm": 3, "u": [3, 43], "id": [3, 48], "user": [3, 17, 23, 24, 26, 28, 29, 32, 36, 38, 40, 41, 42, 43, 44, 45, 46, 48, 49], "g": [3, 27, 30, 32, 34, 43, 51], "v": [3, 17, 33], "etc": [3, 4, 32, 38], "passwd": 3, "ro": 3, "group": [3, 17, 42, 44], "home": 3, "mnt": 3, "entrypoint": 3, "bin": [3, 4, 22], "w": [3, 51], "hostnam": 3, "abov": [3, 4, 17, 23, 29, 30, 33, 34, 36, 37, 39, 43, 44, 51], "base": [3, 6, 7, 8, 9, 11, 12, 14, 17, 22, 24, 25, 26, 31, 32, 38], "filesystem": 3, "add": [3, 7, 24, 27, 42, 44, 46, 48, 49, 51], "all": [3, 6, 7, 17, 24, 26, 27, 31, 33, 36, 39, 40, 42, 43, 45], "order": [3, 4, 6, 17, 27, 30, 31, 32, 38, 41, 44, 49], "access": [3, 26, 38], "replac": [3, 17, 24, 26, 27, 39, 44], "port": [3, 26, 48], "forward": [3, 6, 7, 8, 9, 24, 25, 27, 37, 40, 43, 46], "done": [3, 8, 9, 25, 31, 36, 42, 44, 51], "visual": [3, 36, 38, 39, 40, 43, 46, 47, 50], "api": [3, 7, 26, 27, 34, 37, 38, 42, 45, 46, 48], "can": [3, 4, 6, 8, 9, 16, 17, 23, 24, 25, 26, 27, 29, 30, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50], "achiev": [3, 17, 28, 32, 33, 47, 50], "port_id": 3, "ani": [3, 4, 7, 17, 26, 27, 28, 29, 42, 46], "number": [3, 6, 7, 11, 12, 14, 17, 22, 24, 28, 33, 34, 36, 41, 44, 46, 48, 51], "through": [3, 4, 24, 26, 27, 39, 40, 44, 45, 48, 49], "method": [3, 4, 6, 7, 17, 24, 26, 27, 33, 36, 38, 43, 44], "go": [3, 4, 27, 45, 48], "project": [3, 4], "identifi": [3, 4, 17, 40, 43, 46, 51], "wish": [3, 4], "should": [3, 4, 6, 7, 17, 24, 26, 27, 32, 36, 42, 48, 51], "32": [3, 4, 8, 25, 43], "sudo": [3, 4], "y": [3, 4, 27, 40], "altern": [3, 4, 17, 36], "we": [3, 4, 17, 24, 26, 27, 33, 36, 38, 39, 42, 43, 44, 45, 49], "tag": [3, 4, 46], "below": [3, 4, 8, 9, 11, 12, 17, 24, 25, 26, 27, 28, 29, 30, 38, 39, 40, 42, 43, 44, 51], "release_tag": [3, 4], "step": [3, 11, 12, 23, 27, 28, 29, 30, 31, 32, 33, 36, 38, 39, 41, 43, 44], "url": [3, 4, 48], "download_url": [3, 4], "common": [3, 17, 26, 43, 49], "suffix": [3, 4], "wheel_file_suffix": [3, 4], "cp310": [3, 4], "specifi": [3, 4, 8, 9, 11, 12, 14, 17, 25, 27, 29, 36, 42, 44, 49], "pend": [3, 4], "pip3": [3, 4], "h": [3, 4, 50, 51], "These": [3, 4, 24, 26, 27, 29, 30, 31, 32, 37, 38, 39, 40, 43, 44], "assum": [3, 4, 17], "path": [3, 4], "usr": [3, 4], "lib": [3, 4], "dist": [3, 4], "case": [3, 4, 11, 12, 17, 24, 26, 27, 33, 39, 41, 42], "accordingli": [3, 4], "automat": [3, 4, 17, 32, 36, 38, 40, 46], "torch_stabl": [3, 4], "html": [3, 4, 32, 40, 46, 49], "OR": [3, 4, 26], "variabl": [3, 4, 8, 9, 25], "sourc": [3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26, 43], "envsetup": [3, 4], "sh": [3, 4], "unless": [4, 7, 51], "local": [4, 48], "basic": [4, 23, 27, 45], "requisit": 4, "updat": [4, 25, 26, 38, 39, 41, 44, 46], "upgrad": 4, "ye": [4, 36], "wget": 4, "gnupg2": 4, "have": [4, 7, 17, 26, 27, 33, 36, 38, 39, 40, 43, 44, 45], "set": [4, 6, 7, 17, 21, 24, 25, 26, 28, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 51], "default": [4, 7, 8, 9, 11, 12, 24, 25, 28, 33, 36, 42, 44, 46, 48], "do": [4, 27, 36, 40, 44], "were": [4, 32, 38, 42, 45, 51], "test": 4, "7": [4, 11, 12, 14, 27, 51], "sub": [4, 31, 36, 44, 51], "correspond": [4, 17, 24, 31, 33, 38, 40, 51], "visit": [4, 23, 34], "archiv": 4, "obtain": [4, 31, 32, 40, 44, 45], "correct": [4, 27, 28, 30, 38, 39, 43], "exact": [4, 24, 30], "up": [4, 17, 36, 41, 42, 44, 45, 51], "date": 4, "repo": 4, "ubuntu2204": 4, "x86_64": 4, "pin": 4, "mv": 4, "prefer": [4, 36], "d": [4, 8, 9, 11, 12, 25], "repositori": 4, "600": 4, "local_instal": 4, "local_11": 4, "520": 4, "61": 4, "05": [4, 11, 12, 27], "1_amd64": 4, "deb": 4, "kei": 4, "adv": 4, "fetch": 4, "3bf863cc": 4, "pub": 4, "dpkg": 4, "cp": [4, 32], "var": 4, "keyr": 4, "gpg": 4, "share": [4, 24], "echo": 4, "list": [4, 11, 12, 17, 22, 24, 25, 26, 33, 35, 37, 42], "515": 4, "65": [4, 32], "01": [4, 11, 12, 28], "torch_gpu_pt113": 4, "torch_cpu_pt113": 4, "cp36": 4, "cp36m": 4, "cp37": 4, "cp37m": 4, "py3": 4, "none": [4, 6, 7, 8, 9, 10, 11, 12, 14, 17, 24, 25, 26, 27, 48], "actual": [4, 17, 32, 38], "wheel": 4, "filenam": 4, "": [4, 6, 7, 17, 23, 24, 25, 27, 32, 35, 36, 38, 39, 40, 41, 43, 44, 48, 49, 51], "cat": 4, "reqs_deb_common": 4, "txt": 4, "xarg": 4, "reqs_deb_torch_common": 4, "reqs_deb_onnx_common": 4, "reqs_deb_tf_gpu": 4, "reqs_deb_torch_gpu": 4, "reqs_deb_onnx_gpu": 4, "option": [4, 7, 8, 9, 11, 12, 17, 22, 23, 25, 27, 28, 40, 42, 44, 45, 48], "uninstal": 4, "cach": 4, "dir": 4, "9": [4, 11, 12, 16, 43], "post1": 4, "onnxruntime_v": 4, "c": [4, 32], "import": [4, 7, 8, 9, 11, 12, 14, 16, 17, 23, 24, 25, 27, 30, 31, 43], "print": [4, 6, 7, 11, 12, 24, 26, 27, 38, 40], "__version__": 4, "ln": 4, "gnu": 4, "libjpeg": 4, "so": [4, 24, 26, 37, 40, 48], "chose": 4, "between": [4, 17, 24, 26, 39, 40, 42, 44, 45], "class": [6, 7, 8, 9, 14, 17, 18, 20, 21, 22, 25, 26, 27], "v2": [6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 27], "nn": [6, 7, 8, 9, 17, 23, 24, 25, 26, 27, 37, 45, 46], "arg": [6, 7, 11, 12, 16, 17, 24], "kwarg": [6, 7, 11, 12, 16, 24], "mixin": [6, 7, 24], "implement": [6, 7, 24, 26, 37, 43, 45], "fake": [6, 7, 9, 12, 14, 24, 25, 27, 45], "quantiz": [6, 7, 9, 10, 12, 14, 18, 20, 21, 22, 23, 28, 29, 30, 32, 34, 36, 40, 45, 46, 48], "top": [6, 7, 31, 48], "regular": [6, 7, 24, 28, 38, 44], "specif": [6, 17, 27, 28, 29, 30, 32, 34, 36, 37, 38, 39, 42, 46], "input": [6, 7, 8, 9, 11, 12, 14, 17, 24, 25, 26, 27, 31, 36, 40, 42, 44, 47, 48, 50, 51], "output": [6, 7, 8, 9, 11, 17, 24, 25, 26, 27, 31, 36, 39, 40, 42, 44, 46, 47, 50, 51], "paramet": [6, 7, 8, 9, 11, 12, 14, 16, 17, 21, 22, 24, 25, 26, 27, 28, 30, 31, 36, 37, 38, 39, 40, 41, 42, 45, 49], "tensor": [6, 7, 8, 9, 10, 11, 12, 14, 17, 22, 24, 25, 27, 28, 31, 37, 38, 40, 42, 43, 44, 46, 47, 50], "its": [6, 7, 16, 23, 24, 27, 34, 38, 40, 44, 51], "held": [6, 27], "quantizerbas": [6, 7, 24, 25], "object": [6, 7, 16, 17, 22, 24, 25, 27, 30, 38, 41, 44], "dure": [6, 24, 27, 28, 34, 36, 38, 41, 42, 44, 48, 49], "inherit": [6, 24], "layer": [6, 7, 17, 24, 27, 28, 29, 30, 31, 32, 35, 37, 38, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51], "oper": [6, 7, 24, 26, 27, 37, 38, 39, 42, 43], "behav": [6, 7, 24, 43], "exactli": [6, 7, 24, 44], "same": [6, 7, 16, 17, 24, 25, 26, 30, 39, 42, 44, 45, 49], "parent": [6, 7], "A": [6, 17, 22, 24, 32, 38, 40, 41, 42, 43, 44], "initi": [6, 7, 8, 9, 14, 24, 25, 28, 41, 43, 44], "scratch": 6, "syntax": 6, "form": 6, "from_modul": [6, 7], "input_quant": [6, 7, 24, 26, 27], "modulelist": [6, 7, 24, 26, 27], "appli": [6, 7, 8, 9, 11, 12, 17, 24, 25, 27, 28, 29, 30, 33, 36, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49], "type": [6, 7, 8, 9, 16, 17, 22, 24, 25, 26, 38, 40, 42, 44, 48], "output_quant": [6, 7, 24, 26, 27], "param_quant": [6, 7, 17, 24, 26, 27], "moduledict": [6, 7, 24, 26, 27], "map": [6, 7, 11, 12, 16, 17, 24, 40, 42], "associ": [6, 7, 24, 38], "exampl": [6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 27, 28, 32, 33, 34, 38, 40, 42, 44, 46, 51], "qlinear": [6, 7, 24, 26], "fakequantizedlinear": [6, 24], "in_featur": [6, 7, 24, 26, 27], "out_featur": [6, 7, 24, 26, 27], "bia": [6, 7, 14, 26, 27, 28, 31, 38, 39, 42, 43, 46], "fals": [6, 7, 8, 9, 11, 12, 16, 17, 24, 25, 26, 27, 37, 42], "weight": [6, 7, 17, 22, 24, 26, 27, 28, 30, 32, 36, 38, 39, 40, 41, 42, 43, 44, 49], "linear": [6, 7, 17, 24, 26, 27, 30, 31], "true": [6, 7, 8, 9, 14, 16, 17, 22, 24, 25, 26, 27, 37, 42], "abstract": [6, 7, 24, 25], "perform": [6, 7, 8, 9, 17, 24, 25, 27, 29, 30, 31, 32, 33, 36, 38, 39, 40, 41, 43, 45], "logic": [6, 7, 46], "param": [6, 17, 25, 42], "call": [6, 7, 14, 16, 17, 24, 27, 30, 36, 38, 40, 42, 44, 46, 47, 50], "pass": [6, 7, 17, 23, 24, 26, 27, 34, 37, 38, 39, 40, 41, 43, 44, 46, 48], "__quant_init__": [6, 7, 24], "invok": [6, 7, 24, 36, 38, 48, 49], "right": [6, 7, 8, 9, 11, 12, 14, 24, 25, 38, 51], "after": [6, 7, 24, 27, 28, 29, 30, 32, 36, 38, 41, 43, 48, 49], "__init__": [6, 7, 24, 27], "structur": [6, 7, 24, 36], "size": [6, 7, 8, 9, 11, 12, 17, 24, 25, 28, 36, 37, 47, 50], "initializd": [6, 7, 24], "custom": [6, 7, 24, 43, 44, 45], "overridden": [6, 7, 24], "length": [6, 7, 17, 22, 24], "given": [6, 7, 17, 24, 29, 31, 33, 34, 36, 39, 47, 48, 50], "compute_encod": [6, 7, 8, 9, 14, 16, 23, 24, 25, 26, 27, 45], "enter": [6, 7, 24, 29], "context": [6, 7, 24, 27], "observ": [6, 7, 18, 21, 24, 25, 27, 33, 36, 38, 39, 40, 41, 44], "encod": [6, 7, 8, 9, 16, 17, 20, 21, 22, 23, 25, 26, 27, 28, 30, 38, 40, 41, 45, 46], "upon": [6, 7, 24, 27], "exit": [6, 7, 24, 27], "quantizedlinear": [6, 7, 17, 24, 26, 27], "symmetr": [6, 7, 8, 9, 16, 17, 22, 24, 25, 26, 27, 42, 44], "randn": [6, 7, 8, 9, 16, 24, 25], "16": [6, 7, 8, 14, 17, 24, 25, 28], "is_initi": [6, 7, 8, 9, 14, 24, 25], "classmethod": [6, 7], "creat": [6, 7, 23, 24, 27, 28, 30, 36, 37, 38, 41, 44], "instanc": [6, 7, 48], "result": [6, 7, 16, 17, 22, 28, 29, 31, 32, 34, 39, 40, 41, 42, 44], "attribut": [6, 7, 24, 26, 40], "origin": [6, 7, 24, 26, 27, 31, 32, 36, 38, 39, 40, 41, 44, 48], "mai": [6, 7, 16, 17, 24, 28, 32, 36, 38, 39, 40, 42, 43, 44], "assign": [6, 7, 8, 9, 24, 25], "float": [6, 7, 14, 16, 17, 23, 24, 38, 40, 43, 44, 45, 49], "point": [6, 7, 16, 17, 23, 24, 26, 34, 36, 38, 40, 43, 44, 45, 49], "return": [6, 7, 8, 9, 16, 17, 22, 23, 25, 27, 29, 33, 34, 40, 44], "quantized_linear": [6, 7], "module_cl": [6, 7], "decor": [6, 7], "regist": [6, 7, 24, 25], "defin": [6, 17, 24, 27, 37, 38, 40, 42, 44], "featur": [7, 17, 24, 28, 29, 30, 36, 39, 40, 44, 46, 48, 49], "under": [7, 17, 24, 26, 40, 42, 48, 49], "heavi": [7, 17, 24, 48, 49], "chang": [7, 17, 24, 27, 28, 36, 40, 41, 42, 44, 45, 49, 51], "occur": [7, 17, 24], "without": [7, 14, 16, 17, 24, 29, 38, 41, 44, 51], "notic": [7, 17, 24, 36], "futur": [7, 17, 24, 45], "verion": 7, "function": [7, 11, 12, 16, 17, 24, 26, 27, 28, 33, 36, 37, 38, 40, 44, 46, 48, 49], "behavior": [7, 24, 26, 27, 34, 45], "fakequantizationmixin": [7, 23, 24], "abil": [7, 46], "kernel": [7, 17, 24, 31, 45, 47, 50], "which": [7, 8, 9, 11, 12, 16, 17, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 36, 38, 39, 40, 42, 44, 45, 46, 47, 48, 49, 50], "place": [7, 17, 41, 42], "ha": [7, 16, 17, 26, 27, 32, 33, 36, 39, 41, 44, 48, 51], "been": [7, 16, 17, 38, 41, 44, 45, 51], "within": [7, 16, 24, 32, 40, 44], "fall": [7, 33, 42], "back": [7, 16, 27, 42], "equival": [7, 11, 12, 14, 17, 27], "e": [7, 27, 30, 32, 34, 41, 43, 51], "get_kernel": 7, "doe": [7, 24, 26, 27, 33, 35, 38, 43], "retriev": 7, "well": [7, 16, 17, 24, 32, 36, 38, 39, 40, 44, 47], "dequant": [7, 9, 12, 16, 23, 24, 25, 44], "set_kernel": 7, "signatur": [7, 11, 12], "must": [7, 17, 24, 30, 34, 35, 40, 42, 51], "match": [7, 17, 31, 36, 40, 42, 43, 44, 51], "In": [7, 17, 24, 26, 27, 28, 29, 32, 33, 36, 38, 39, 41, 42, 44, 45, 49, 51], "gener": [7, 8, 9, 11, 12, 17, 25, 27, 36, 38, 40, 41, 42, 44], "quantizedtensor": [7, 8, 16, 25], "take": [7, 17, 27, 34, 36, 38, 39, 41, 42, 43, 51], "addit": [7, 17, 29, 38, 41, 42, 46], "keyword": 7, "argument": [7, 14, 17], "output_encod": 7, "onc": [7, 30, 31, 36, 40, 41, 44], "callabl": [7, 17], "underli": [7, 43], "q": [7, 8, 9, 11, 12, 14, 16, 24, 25, 26, 44], "def": [7, 26, 27], "int_multipli": 7, "b": [7, 8, 9, 11, 12, 25], "enc": 7, "affin": [7, 8, 9, 10, 11, 12, 16, 17, 23, 24, 25, 27, 45], "rais": 7, "notimplementederror": 7, "q_output": 7, "quantized_repr": [7, 16], "offset": [7, 8, 9, 10, 11, 12, 17, 22, 25, 38, 40, 41, 44], "dq_output": 7, "scale": [7, 8, 9, 10, 11, 12, 14, 16, 17, 25, 30, 38, 39, 40, 41, 44], "qmult": 7, "quantizedmultipli": [7, 24], "set_default_kernel": 7, "quantized_forward": 7, "cl": [7, 46], "get_default_kernel": 7, "current": [7, 31, 34, 35, 36, 37, 42, 47, 50], "shape": [8, 9, 14, 16, 17, 20, 21, 22, 24, 25, 26, 27, 40], "bitwidth": [8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 30, 38, 43, 44], "encoding_analyz": [8, 9, 14, 18, 20, 21, 22, 25], "block_siz": [8, 9, 10, 11, 12, 17, 25], "precis": [8, 9, 11, 12, 14, 23, 25, 38], "out": [8, 9, 11, 12, 14, 25, 29, 32, 36, 40], "clamp": [8, 9, 11, 12, 14, 25, 44], "left": [8, 9, 11, 12, 14, 25, 33, 51], "lceil": [8, 9, 11, 12, 14, 25], "frac": [8, 9, 11, 12, 14, 25], "rfloor": [8, 9, 11, 12, 14, 25], "qmin": [8, 9, 11, 12, 25, 44], "qmax": [8, 9, 11, 12, 25, 44], "where": [8, 9, 11, 12, 14, 24, 25, 27, 30, 33, 40, 41, 47, 50, 51], "deriv": [8, 9, 11, 12, 24, 25], "learnabl": [8, 9, 25], "theta_": [8, 9, 25], "min": [8, 9, 20, 22, 24, 25, 26, 27, 40, 44], "max": [8, 9, 14, 20, 22, 24, 25, 26, 27, 36, 39, 40, 44], "block": [8, 9, 11, 12, 17, 25, 26, 45], "begin": [8, 9, 11, 12, 25, 41, 42], "pmatrix": [8, 9, 11, 12, 25], "b_0": [8, 9, 11, 12, 25], "b_1": [8, 9, 11, 12, 17, 25], "cdot": [8, 9, 11, 12, 25], "b_": [8, 9, 11, 12, 25], "end": [8, 9, 11, 12, 25, 27, 36], "equat": [8, 9, 11, 12, 17, 25, 44], "further": [8, 9, 11, 12, 16, 17, 25, 26, 27, 31, 34, 36, 38, 42], "out_": [8, 9, 11, 12, 25], "j_0": [8, 9, 11, 12, 25], "j_": [8, 9, 11, 12, 25], "input_": [8, 9, 11, 12, 25], "scale_": [8, 9, 11, 12, 25], "i_0": [8, 9, 11, 12, 25], "i_": [8, 9, 11, 12, 25], "offset_": [8, 9, 11, 12, 25], "text": [8, 9, 11, 12, 25], "quad": [8, 9, 11, 12, 25, 44], "forall_": [8, 9, 11, 12, 25], "leq": [8, 9, 11, 12, 25], "i_d": [8, 9, 11, 12, 25], "lfloor": [8, 9, 11, 12, 14, 25], "j_d": [8, 9, 11, 12, 25], "b_d": [8, 9, 11, 12, 25], "tupl": [8, 9, 11, 12, 17, 22, 25], "int": [8, 9, 11, 12, 14, 17, 22, 25, 26], "bool": [8, 9, 11, 12, 17, 22, 25], "asymmetr": [8, 9, 22, 25, 42, 44], "encodinganalyz": [8, 9, 14, 18, 25], "analyz": [8, 9, 20, 21, 22, 23, 24, 25, 29, 31, 36, 37, 40, 44, 45, 48, 49], "calibr": [8, 9, 17, 20, 21, 22, 23, 24, 25, 27, 38, 40, 41, 43, 44], "absolut": [8, 9, 25], "cannot": [8, 9, 25], "until": [8, 9, 25, 29], "properli": [8, 9, 25, 27], "statist": [8, 9, 14, 24, 25, 27, 30, 38, 40, 49], "manual": [8, 9, 25, 26, 29, 36], "valu": [8, 9, 11, 12, 14, 16, 17, 21, 22, 25, 27, 28, 33, 36, 38, 39, 40, 41, 44, 47, 49, 50], "see": [8, 9, 24, 25, 26, 27, 31, 33, 34, 36, 38, 42, 43, 44, 47, 48, 49, 50], "129": [8, 25, 37], "255": [8, 16, 25], "122": [8, 25], "192": [8, 25], "106": [8, 25], "94": [8, 25], "145": [8, 25], "181": [8, 25], "144": [8, 25], "194": [8, 25], "74": [8, 25], "86": [8, 25], "150": [8, 25], "33": [8, 25], "103": [8, 25], "37": [8, 25], "111": [8, 25], "237": [8, 25], "218": [8, 25], "49": [8, 25], "155": [8, 25], "179": [8, 25], "66": [8, 25, 32], "89": [8, 25], "110": [8, 25], "17": [8, 22, 25], "36": [8, 25], "83": [8, 25], "grad_fn": [8, 9, 16, 25], "aliasbackward0": [8, 9, 16, 25], "ones_lik": [8, 9, 25], "187": [8, 25], "186": [8, 25], "131": [8, 25], "203": [8, 25], "80": [8, 25], "143": [8, 25], "152": [8, 25], "226": [8, 25], "55": [8, 25], "172": [8, 25], "207": [8, 25], "146": [8, 25], "216": [8, 25], "238": [8, 25], "141": [8, 25], "178": [8, 25], "188": [8, 25], "63": [8, 25], "59": [8, 25], "19": [8, 25], "162": [8, 25], "30": [8, 25], "109": [8, 25], "overlin": [9, 12, 25], "qdq": [9, 14, 25], "dequantizedtensor": [9, 16, 25], "2771": [9, 25], "3038": [9, 25], "0819": [9, 25], "9700": [9, 25], "9487": [9, 25], "1307": [9, 25], "7894": [9, 25], "1709": [9, 25], "2212": [9, 25], "7741": [9, 25], "0295": [9, 25], "2265": [9, 25], "0564": [9, 25], "6177": [9, 25], "0386": [9, 25], "0176": [9, 25], "6054": [9, 25], "8836": [9, 25], "1232": [9, 25], "8229": [9, 25], "5540": [9, 25], "3992": [9, 25], "2363": [9, 25], "2546": [9, 25], "0036": [9, 25], "2355": [9, 25], "1741": [9, 25], "6079": [9, 25], "6247": [9, 25], "0115": [9, 25], "2458": [9, 25], "9157": [9, 25], "4694": [9, 25], "0639": [9, 25], "2568": [9, 25], "0680": [9, 25], "6695": [9, 25], "7932": [9, 25], "1889": [9, 25], "0158": [9, 25], "5695": [9, 25], "5220": [9, 25], "1977": [9, 25], "4475": [9, 25], "0424": [9, 25], "1128": [9, 25], "8796": [9, 25], "1060": [9, 25], "5897": [9, 25], "6196": [9, 25], "9961": [9, 25], "0549": [9, 25], "6431": [9, 25], "0039": [9, 25], "8706": [9, 25], "4706": [9, 25], "2353": [9, 25], "8078": [9, 25], "3451": [9, 25], "1176": [9, 25], "4549": [9, 25], "0471": [9, 25], "5255": [9, 25], "4157": [9, 25], "0784": [9, 25], "5333": [9, 12, 25], "1647": [9, 25], "2118": [9, 25], "2196": [9, 25], "9176": [9, 25], "9490": [9, 25], "7765": [9, 25], "4784": [9, 25], "6039": [9, 25], "3137": [9, 25], "3216": [9, 25], "8000": [9, 12, 25], "4392": [9, 25], "4863": [9, 25], "overload": [11, 12], "sign": [11, 12, 26, 44], "rceil": [11, 12], "posit": [11, 12], "integ": [11, 12, 17, 28, 38, 40], "rang": [11, 12, 21, 22, 27, 28, 30, 33, 38, 39, 40, 41, 43, 44, 46, 49], "over": [11, 12, 22, 24, 28, 33, 36, 49], "neg": [11, 12, 17, 24], "num_step": [11, 12, 22], "num": [11, 12], "_step": [11, 12], "maximum": [11, 12, 14, 22, 24], "arang": [11, 12], "start": [11, 12, 27, 28, 33, 36, 42, 44], "0000e": [11, 12], "5000e": [11, 12], "02": [11, 12], "1921e": [11, 12], "08": [11, 12], "4": [11, 12, 16, 17, 26, 27, 30, 33, 38, 51], "6": [11, 12, 17, 41], "00": [11, 12], "0500e": [11, 12], "1000e": [11, 12], "1500e": [11, 12], "2000e": [11, 12], "2500e": [11, 12], "15": [11, 12, 36, 41], "0000": [12, 16], "0667": 12, "1333": 12, "2000": [12, 16], "2667": 12, "3333": 12, "4000": [12, 16], "4667": 12, "6000": [12, 16], "6667": 12, "7333": 12, "8667": 12, "9333": 12, "exponent_bit": [14, 17], "mantissa_bit": [14, 17], "dtype": [14, 16, 17, 26], "simul": [14, 17, 23, 24, 27, 34, 38, 41, 45, 46], "cast": [14, 24], "expon": [14, 17], "mantissa": [14, 17], "x_c": 14, "log_2": 14, "ieee": [14, 36, 39], "standard": [14, 24], "represent": [14, 16], "_max": 14, "mutual": [14, 17], "exclus": [14, 17], "repres": [14, 16, 24, 25, 27, 33, 38, 39, 40, 41, 44], "determin": [14, 17, 24, 27, 29, 32, 36, 38, 39, 40], "dynam": [14, 39, 44, 46, 49], "finer": [14, 17, 45], "8998": 14, "0947": 14, "0891": 14, "1727": 14, "unlik": 14, "affinequant": [14, 26], "floatquant": [14, 26], "is_bfloat16": 14, "8984": 14, "0859": 14, "1729": 14, "minmaxencodinganalyz": [14, 23, 26], "float16": [14, 17, 26], "is_float16": 14, "8994": 14, "0889": 14, "alia": 14, "hold": [16, 17, 24, 42], "store": [16, 17], "along": [16, 17, 27, 41, 44], "encodingbas": [16, 25], "inform": [16, 26, 38, 40], "necessari": [16, 17, 27, 48], "real": 16, "self": [16, 22, 27], "produc": [16, 17, 22, 33, 40, 45, 48], "rtype": 16, "57": 16, "312": 16, "153": 16, "205": 16, "set_rang": 16, "128": [16, 17, 27], "127": 16, "x_q": 16, "26": 16, "23": 16, "x_dq": 16, "3000": 16, "equal": [16, 17, 22, 24, 28, 29, 32, 33, 37, 38, 40, 49], "data": [16, 17, 23, 26, 27, 28, 30, 35, 38, 39, 40, 41, 43, 44], "abl": [16, 27, 28, 48, 49], "carri": 16, "gradient": 16, "thu": 16, "autograd": 16, "allow": [16, 17, 24, 29, 34, 36, 38, 40, 41, 42, 43, 44, 45, 46, 48], "backpropag": 16, "requires_grad": 16, "38": [16, 36], "28": 16, "40": 16, "int8": [16, 41, 44, 49], "subsequ": [16, 37, 39, 41, 42], "about": [16, 27, 45], "wa": [16, 31, 36, 42], "With": [16, 45], "convert": [16, 27, 29, 38, 49], "loss": [16, 23, 27, 28, 34, 38, 40, 44], "39": [16, 27], "51": 16, "521": 16, "41": 16, "quant_dequ": 16, "quantizedequant": [16, 17, 23, 24, 25, 26, 27], "x_qdq": 16, "52": 16, "68": 16, "97": 16, "uint8": 16, "when": [17, 21, 23, 24, 27, 28, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51], "also": [17, 31, 32, 33, 38, 40, 42, 43, 44, 46, 48, 49, 51], "known": [17, 33, 34], "like": [17, 23, 26, 27, 34, 36, 38, 40, 41, 42, 45, 48], "grid": [17, 45], "counterpart": [17, 24], "process": [17, 22, 23, 27, 29, 34, 36, 38, 39, 44, 45], "particular": [17, 38, 42], "choos": [17, 31, 32, 36], "come": [17, 41, 44], "cover": [17, 30, 42, 44], "whole": [17, 44], "split": [17, 22, 24], "describ": [17, 26, 38, 39, 43, 44], "sever": [17, 24, 32], "pro": 17, "con": 17, "per": [17, 22, 24, 30, 38, 39, 40, 42, 43, 44, 46], "entir": [17, 27, 33, 36], "collect": [17, 31, 40], "singl": [17, 28, 39], "benefit": [17, 28], "less": [17, 22, 24, 31, 33], "storag": 17, "space": 17, "drawback": 17, "outlier": [17, 40, 44], "affect": [17, 30, 42, 51], "channel": [17, 24, 30, 32, 33, 35, 36, 39, 40, 42, 43, 44, 46, 47, 49, 50, 51], "individu": [17, 30, 31, 32, 33, 36, 38, 40, 43], "typic": [17, 27, 32, 38, 40, 41, 42, 44, 48], "dimens": [17, 24, 36, 43, 47, 50], "compar": [17, 27, 40, 41, 49], "would": [17, 26, 32, 36, 42, 46, 48], "influenc": 17, "resid": [17, 46], "chunk": 17, "across": [17, 39, 40], "improv": [17, 26, 27, 32, 38, 41, 43, 49], "granular": [17, 36, 43, 44, 45, 49], "found": [17, 26, 41, 44, 45], "isol": 17, "optim": [17, 27, 28, 29, 34, 36, 38, 41, 44, 45, 46, 48], "cost": [17, 33, 36, 41], "increas": [17, 33, 39, 42], "favor": 17, "possibl": [17, 27, 40, 42, 43], "similarli": [17, 43], "lead": [17, 28, 30, 39, 43, 44], "better": [17, 28, 29, 38, 39, 41], "accuraci": [17, 23, 27, 28, 29, 32, 33, 34, 36, 38, 39, 40, 41, 43, 44, 46, 49, 51], "activ": [17, 24, 27, 38, 40, 41, 42, 43, 44], "runtim": [17, 23, 27, 32, 34, 36, 38, 40, 42, 44, 46], "part": [17, 36, 38, 39, 40], "basi": [17, 33, 36], "instanti": [17, 27, 41, 48], "relationship": 17, "being": [17, 26], "rule": [17, 42], "most": [17, 27, 42], "long": 17, "b_2": 17, "b_n": 17, "s_1": 17, "s_2": 17, "s_n": 17, "satisfi": [17, 27, 29], "n": [17, 27, 46], "word": 17, "evenli": 17, "divid": [17, 24, 41], "valid": [17, 29, 38, 46], "sinc": [17, 30, 32, 33, 44], "divis": 17, "permit": 17, "essenti": [17, 23], "invalid": 17, "combin": [17, 29, 32, 36, 38, 39], "though": [17, 42], "3d": 17, "final": [17, 31, 32, 33, 41, 43, 48], "infer": [17, 23, 27, 30, 32, 34, 39, 41, 44, 46], "while": [17, 24, 28, 33, 37, 38, 41, 43, 44, 45, 48], "arbitrari": 17, "experiment": [17, 26, 36, 42, 45], "purpos": [17, 42], "restrict": [17, 37], "constraint": 17, "still": [17, 38, 43], "themselv": [17, 41], "code": [17, 27, 28, 45], "show": [17, 23, 27, 34, 39, 43], "how": [17, 24, 26, 27, 36, 39, 40, 43, 44, 45], "configur": [17, 27, 32, 35, 46], "convolut": [17, 27, 30, 32, 36, 43], "sim": [17, 23, 26, 27, 41, 44], "conv_1": 17, "refer": [17, 26, 28, 29, 30, 34, 38, 40, 41, 42, 44, 45], "quantizedconv2d": [17, 24, 26, 27], "work": [17, 30, 36, 37, 39, 42], "too": 17, "linear_1": 17, "scheme": [17, 29, 30, 33, 36, 40], "lower": [17, 26, 33, 38, 43], "adjust": [17, 30, 31, 32, 38, 39, 43], "thei": [17, 42, 45, 48], "lie": 17, "higher": [17, 22, 30, 33, 41, 43], "leverag": 17, "than": [17, 26, 27, 35, 41, 48], "due": [17, 38, 39], "fact": 17, "expans": [17, 36], "factor": [17, 22, 32, 36, 39], "fashion": 17, "groupedblockquantizedequant": 17, "introduc": [17, 38, 42, 44], "decompressed_bw": 17, "expand": [17, 24], "greater": [17, 24], "block_group": 17, "togeth": [17, 36], "As": [17, 29, 31, 32, 33, 36, 38, 39, 40, 44, 47, 50], "except": 17, "make": [17, 24, 33, 36, 37, 38, 44, 45], "easier": [17, 26, 45], "quantsim": [17, 23, 38, 41, 42, 46], "config_util": 17, "set_blockwise_quantization_for_weight": 17, "quantizationsimmodel": [17, 23, 27, 28, 30], "consist": [17, 26, 29, 44, 51], "either": [17, 34, 44], "whose": [17, 26, 39, 42, 45, 51], "union": 17, "arrai": 17, "in_channel": [17, 27], "out_channel": [17, 27], "conv": [17, 35, 42, 46, 47, 50, 51], "input_channel": 17, "conv2d": [17, 24, 26, 27, 31, 36, 46, 51], "conv2": [17, 27], "linear1": 17, "dim": [17, 27], "lambda": 17, "isinst": 17, "util": [17, 26, 27, 30, 38], "certain": [17, 36, 37, 38, 42], "Of": 17, "signific": [17, 43], "second": [17, 24, 42], "subset": [17, 28, 30, 40, 51], "switch": 17, "docstr": 17, "instead": [17, 38, 39], "differ": [17, 26, 31, 33, 36, 38, 39, 41, 42, 43, 44, 45], "ex": 17, "4d": 17, "2d": 17, "handl": 17, "requir": [17, 27, 28, 30, 32, 36, 38, 39, 42, 44], "time": [17, 27, 29, 36, 37, 41, 48], "mention": 17, "assist": [17, 48, 49], "transform": [17, 27, 46], "set_activation_quantizers_to_float": 17, "set_grouped_blockwise_quantization_for_weight": 17, "decompress": 17, "bw": 17, "experi": [17, 36, 45], "similar": [17, 39, 41, 44], "addition": 17, "effect": [17, 24, 27, 30, 38, 40, 42, 44], "larger": [17, 47, 50], "reduc": [17, 24, 31, 36, 39, 43, 46, 51], "write": 17, "snippet": 17, "encoding_vers": 17, "exported_model": 17, "dummy_input": [17, 27], "present": [17, 26, 27, 36, 39], "techniqu": [20, 21, 22, 23, 27, 28, 29, 31, 32, 34, 38, 40, 41, 43, 44, 45, 46, 47, 50], "num_bin": [21, 22], "2048": [21, 22], "percentil": 21, "100": [21, 26, 27], "set_percentil": 21, "clip": [21, 22, 42, 44], "largest": 21, "smallest": 21, "50": [21, 32], "indic": [21, 24, 32, 51], "asymmetric_delta_candid": 22, "symmetric_delta_candid": 22, "101": 22, "offset_candid": 22, "21": 22, "max_parallel": 22, "gamma": 22, "sqnr": [22, 44], "calcul": [22, 24, 33, 39, 40, 44], "histogram": [22, 38, 40, 44, 46], "delta": [22, 44], "search": [22, 33, 41, 42], "mode": [22, 25, 37, 38, 42], "paral": 22, "memori": [22, 32, 36, 47, 50, 51], "usag": [22, 23, 32, 36, 43], "faster": [22, 28, 34, 41], "nois": [22, 27, 38, 39, 40, 41, 42], "compute_encodings_from_stat": 22, "stat": 22, "is_symmetr": [22, 42], "lowest": 22, "expect": [22, 27, 36, 38, 40], "_histogram": 22, "els": [22, 27, 39], "tool": [23, 27, 36, 39, 49, 51], "compress": [23, 31, 34, 46, 47, 49, 50, 51], "deploi": [23, 44], "edg": [23, 34], "devic": [23, 27, 44], "fix": [23, 34, 38, 43, 44, 46], "post": [23, 27, 28, 29, 34, 36, 41, 44, 46], "fine": [23, 32, 34, 38, 41, 44], "tune": [23, 32, 34, 38, 41, 44], "minim": [23, 34, 36, 38, 44], "incur": [23, 34, 40], "pictur": [23, 31, 34], "high": [23, 26, 28, 30, 32, 33, 34, 39, 43, 45, 46, 49], "level": [23, 26, 30, 32, 33, 34, 38, 43, 45, 48], "view": [23, 27, 28, 29, 30, 34, 37, 39, 40, 44, 48], "workflow": [23, 27, 32, 34], "low": [23, 28, 30, 36, 38, 39, 43, 45], "recov": [23, 34, 43, 44], "lost": [23, 34], "via": [23, 32, 34, 44], "torchscript": 23, "target": [23, 30, 32, 33, 34, 36, 38, 43, 44, 46], "neural": [23, 27, 29, 32, 34, 36, 38, 41, 43, 44, 50], "sdk": [23, 27, 34], "instal": [23, 46], "sample_input": [23, 27], "sampl": [23, 24, 27, 31, 38, 39, 40, 41, 44], "data_load": [23, 27], "sample_output": 23, "out_dir": 23, "quantized_model": 23, "quickstart": 23, "guid": [23, 26, 32, 39, 43, 45, 46], "depth": [23, 32, 43], "adapt": [23, 27, 28, 38, 40, 46], "round": [23, 24, 28, 38, 40, 44], "adaround": [23, 26, 29, 38, 43, 45, 46], "sqnrencodinganalyz": [23, 26], "percentileencodinganalyz": [23, 26], "quantizationmixin": [23, 24], "quantize_dequant": 23, "product": [23, 34], "technologi": [23, 34], "subsidiari": [23, 34], "network": [24, 27, 29, 32, 33, 36, 38, 41, 43, 44, 48, 50], "aimet": [24, 26, 27, 34, 37, 42, 45], "serv": [24, 48], "drop": [24, 29, 32, 36, 39, 40, 41, 43, 44], "nativ": 24, "state": [24, 27, 36], "superset": 24, "mean": [24, 27, 31, 42, 44], "extens": 24, "coverag": 24, "limit": [24, 26, 35], "tabl": [24, 26, 33, 37, 48], "full": [24, 50], "basequantizationmixin": 24, "respons": [24, 36], "control": [24, 44, 45], "descript": [24, 37], "dict": [24, 25], "By": [24, 36, 42, 44], "index": [24, 32, 46], "respect": [24, 40], "per_channel_quant": [24, 42], "elementwis": [24, 46], "multipli": [24, 32], "qmul": 24, "some": [24, 26, 27, 28, 32, 33, 36, 37, 38, 39, 41, 43, 44], "sens": 24, "qadd": 24, "quantizedadd": 24, "befor": [24, 26, 27, 28, 29, 30, 36, 38, 41, 48, 49], "first": [24, 27, 32, 36, 38, 41, 48], "disabl": [24, 26, 33, 36, 40, 42, 44], "them": [24, 26, 27, 28, 51], "calibration_data_load": 24, "adaptiveavgpool1d": 24, "fakequantizedadaptiveavgpool1d": 24, "adaptiveavgpool2d": 24, "fakequantizedadaptiveavgpool2d": 24, "adaptiveavgpool3d": 24, "fakequantizedadaptiveavgpool3d": 24, "adaptivemaxpool1d": 24, "fakequantizedadaptivemaxpool1d": 24, "adaptivemaxpool2d": 24, "fakequantizedadaptivemaxpool2d": 24, "adaptivemaxpool3d": 24, "fakequantizedadaptivemaxpool3d": 24, "alphadropout": 24, "fakequantizedalphadropout": 24, "avgpool1d": 24, "fakequantizedavgpool1d": 24, "avgpool2d": 24, "fakequantizedavgpool2d": 24, "avgpool3d": 24, "fakequantizedavgpool3d": 24, "batchnorm1d": 24, "fakequantizedbatchnorm1d": 24, "batchnorm2d": [24, 27], "fakequantizedbatchnorm2d": 24, "batchnorm3d": 24, "fakequantizedbatchnorm3d": 24, "celu": 24, "fakequantizedcelu": 24, "channelshuffl": 24, "fakequantizedchannelshuffl": 24, "constantpad1d": 24, "fakequantizedconstantpad1d": 24, "constantpad2d": 24, "fakequantizedconstantpad2d": 24, "constantpad3d": 24, "fakequantizedconstantpad3d": 24, "conv1d": [24, 46], "fakequantizedconv1d": 24, "quantizedconv1d": 24, "fakequantizedconv2d": 24, "conv3d": 24, "fakequantizedconv3d": 24, "quantizedconv3d": 24, "convtranspose1d": [24, 46], "fakequantizedconvtranspose1d": 24, "convtranspose2d": 24, "fakequantizedconvtranspose2d": 24, "convtranspose3d": 24, "fakequantizedconvtranspose3d": 24, "crossmaplrn2d": 24, "fakequantizedcrossmaplrn2d": 24, "dropout": 24, "fakequantizeddropout": 24, "dropout2d": 24, "fakequantizeddropout2d": 24, "dropout3d": 24, "fakequantizeddropout3d": 24, "elu": 24, "fakequantizedelu": 24, "featurealphadropout": 24, "fakequantizedfeaturealphadropout": 24, "flatten": 24, "fakequantizedflatten": 24, "fold": [24, 28, 29, 30, 38, 39, 40, 46], "fakequantizedfold": 24, "fractionalmaxpool2d": 24, "fakequantizedfractionalmaxpool2d": 24, "fractionalmaxpool3d": 24, "fakequantizedfractionalmaxpool3d": 24, "gelu": 24, "fakequantizedgelu": 24, "quantizedgelu": 24, "glu": 24, "fakequantizedglu": 24, "groupnorm": 24, "fakequantizedgroupnorm": 24, "hardshrink": 24, "fakequantizedhardshrink": 24, "hardsigmoid": 24, "fakequantizedhardsigmoid": 24, "hardswish": 24, "fakequantizedhardswish": 24, "hardtanh": 24, "fakequantizedhardtanh": 24, "ident": [24, 27], "fakequantizedident": 24, "instancenorm1d": 24, "fakequantizedinstancenorm1d": 24, "instancenorm2d": 24, "fakequantizedinstancenorm2d": 24, "instancenorm3d": 24, "fakequantizedinstancenorm3d": 24, "lppool1d": 24, "fakequantizedlppool1d": 24, "lppool2d": 24, "fakequantizedlppool2d": 24, "layernorm": 24, "fakequantizedlayernorm": 24, "quantizedlayernorm": 24, "leakyrelu": 24, "fakequantizedleakyrelu": 24, "localresponsenorm": 24, "fakequantizedlocalresponsenorm": 24, "logsigmoid": 24, "fakequantizedlogsigmoid": 24, "logsoftmax": 24, "fakequantizedlogsoftmax": 24, "maxpool1d": 24, "fakequantizedmaxpool1d": 24, "maxpool2d": 24, "fakequantizedmaxpool2d": 24, "maxpool3d": 24, "fakequantizedmaxpool3d": 24, "maxunpool1d": 24, "fakequantizedmaxunpool1d": 24, "maxunpool2d": 24, "fakequantizedmaxunpool2d": 24, "maxunpool3d": 24, "fakequantizedmaxunpool3d": 24, "mish": 24, "fakequantizedmish": 24, "prelu": 24, "fakequantizedprelu": 24, "pixelshuffl": 24, "fakequantizedpixelshuffl": 24, "pixelunshuffl": 24, "fakequantizedpixelunshuffl": 24, "rrelu": 24, "fakequantizedrrelu": 24, "relu": [24, 26, 27, 39, 42, 51], "fakequantizedrelu": [24, 26, 27], "relu6": [24, 39], "fakequantizedrelu6": 24, "reflectionpad1d": 24, "fakequantizedreflectionpad1d": 24, "reflectionpad2d": 24, "fakequantizedreflectionpad2d": 24, "replicationpad1d": 24, "fakequantizedreplicationpad1d": 24, "replicationpad2d": 24, "fakequantizedreplicationpad2d": 24, "replicationpad3d": 24, "fakequantizedreplicationpad3d": 24, "selu": 24, "fakequantizedselu": 24, "silu": 24, "fakequantizedsilu": 24, "sigmoid": 24, "fakequantizedsigmoid": 24, "quantizedsigmoid": 24, "softmax": [24, 27], "fakequantizedsoftmax": 24, "quantizedsoftmax": [24, 27], "softmax2d": 24, "fakequantizedsoftmax2d": 24, "softmin": 24, "fakequantizedsoftmin": 24, "softplu": 24, "fakequantizedsoftplu": 24, "softshrink": 24, "fakequantizedsoftshrink": 24, "softsign": 24, "fakequantizedsoftsign": 24, "syncbatchnorm": 24, "fakequantizedsyncbatchnorm": 24, "tanh": 24, "fakequantizedtanh": 24, "tanhshrink": 24, "fakequantizedtanhshrink": 24, "threshold": [24, 29], "fakequantizedthreshold": 24, "unflatten": 24, "fakequantizedunflatten": 24, "unfold": 24, "fakequantizedunfold": 24, "upsampl": [24, 37], "fakequantizedupsampl": 24, "upsamplingbilinear2d": 24, "fakequantizedupsamplingbilinear2d": 24, "upsamplingnearest2d": 24, "fakequantizedupsamplingnearest2d": 24, "zeropad2d": 24, "fakequantizedzeropad2d": 24, "bceloss": 24, "fakequantizedbceloss": 24, "bcewithlogitsloss": 24, "fakequantizedbcewithlogitsloss": 24, "bilinear": [24, 37], "fakequantizedbilinear": 24, "ctcloss": 24, "fakequantizedctcloss": 24, "cosinesimilar": 24, "fakequantizedcosinesimilar": 24, "crossentropyloss": [24, 27], "fakequantizedcrossentropyloss": 24, "hingeembeddingloss": 24, "fakequantizedhingeembeddingloss": 24, "huberloss": 24, "fakequantizedhuberloss": 24, "kldivloss": 24, "fakequantizedkldivloss": 24, "l1loss": 24, "fakequantizedl1loss": 24, "mseloss": 24, "fakequantizedmseloss": 24, "multilabelmarginloss": 24, "fakequantizedmultilabelmarginloss": 24, "multilabelsoftmarginloss": 24, "fakequantizedmultilabelsoftmarginloss": 24, "multimarginloss": 24, "fakequantizedmultimarginloss": 24, "nllloss": 24, "fakequantizednllloss": 24, "nllloss2d": 24, "fakequantizednllloss2d": 24, "pairwisedist": 24, "fakequantizedpairwisedist": 24, "poissonnllloss": 24, "fakequantizedpoissonnllloss": 24, "smoothl1loss": 24, "fakequantizedsmoothl1loss": 24, "softmarginloss": 24, "fakequantizedsoftmarginloss": 24, "cosineembeddingloss": 24, "fakequantizedcosineembeddingloss": 24, "gaussiannllloss": 24, "fakequantizedgaussiannllloss": 24, "marginrankingloss": 24, "fakequantizedmarginrankingloss": 24, "tripletmarginloss": 24, "fakequantizedtripletmarginloss": 24, "tripletmarginwithdistanceloss": 24, "fakequantizedtripletmarginwithdistanceloss": 24, "embed": [24, 36, 43], "fakequantizedembed": 24, "embeddingbag": 24, "fakequantizedembeddingbag": 24, "gru": [24, 46], "fakequantizedgru": 24, "rnn": [24, 46], "fakequantizedrnn": 24, "grucel": 24, "fakequantizedgrucel": 24, "rnncell": 24, "fakequantizedrnncel": 24, "lstm": [24, 46], "fakequantizedlstm": 24, "lstmcell": 24, "fakequantizedlstmcel": 24, "adaptivelogsoftmaxwithloss": 24, "fakequantizedadaptivelogsoftmaxwithloss": 24, "aimet_op": 24, "fakequantizedcast": 24, "depthtospacedcrmod": 24, "fakequantizeddepthtospacedcrmod": 24, "onehot": 24, "fakequantizedonehot": 24, "exponenti": 24, "fakequantizedexponenti": 24, "erf": 24, "fakequantizederf": 24, "sqrt": 24, "fakequantizedsqrt": 24, "log": [24, 40], "fakequantizedlog": 24, "ab": [24, 39], "fakequantizedab": 24, "fakequantizedneg": 24, "elementwiseceil": 24, "fakequantizedelementwiseceil": 24, "elementwisefloor": 24, "fakequantizedelementwisefloor": 24, "sin": 24, "fakequantizedsin": 24, "co": 24, "fakequantizedco": 24, "asin": 24, "fakequantizedasin": 24, "atan": 24, "fakequantizedatan": 24, "fakequantizedround": 24, "logicalnot": 24, "fakequantizedlogicalnot": 24, "nonzero": 24, "fakequantizednonzero": 24, "elementwiseunarysign": 24, "fakequantizedelementwiseunarysign": 24, "rsqrt": 24, "fakequantizedrsqrt": 24, "squar": [24, 44], "fakequantizedsquar": 24, "fakequantizedmean": 24, "sum": [24, 27], "fakequantizedsum": 24, "prod": 24, "fakequantizedprod": 24, "argmin": 24, "fakequantizedargmin": 24, "argmax": [24, 27], "fakequantizedargmax": 24, "gather": 24, "fakequantizedgath": 24, "reshap": 24, "fakequantizedreshap": 24, "roialign": 24, "fakequantizedroialign": 24, "permut": 24, "fakequantizedpermut": 24, "indexselect": 24, "fakequantizedindexselect": 24, "topk": 24, "fakequantizedtopk": 24, "tile": 24, "fakequantizedtil": 24, "norm": [24, 28, 30, 38, 39, 40], "fakequantizednorm": 24, "cumsum": 24, "fakequantizedcumsum": 24, "interpol": [24, 33], "fakequantizedinterpol": 24, "normal": [24, 30, 40], "pad": [24, 27], "fakequantizedpad": 24, "fakequantizedshap": 24, "fakequantizedexpand": 24, "stridedslic": 24, "fakequantizedstridedslic": 24, "matmul": [24, 46], "fakequantizedmatmul": 24, "fakequantizedadd": 24, "fakequantizedmultipli": 24, "subtract": 24, "fakequantizedsubtract": 24, "quantizedsubtract": 24, "fakequantizeddivid": 24, "floordivid": 24, "fakequantizedfloordivid": 24, "fakequantizedgreat": 24, "fakequantizedless": 24, "greaterequ": 24, "fakequantizedgreaterequ": 24, "lessequ": 24, "fakequantizedlessequ": 24, "notequ": 24, "fakequantizednotequ": 24, "fakequantizedequ": 24, "remaind": 24, "fakequantizedremaind": 24, "fmod": 24, "fakequantizedfmod": 24, "pow": 24, "fakequantizedpow": 24, "customsilu": 24, "fakequantizedcustomsilu": 24, "fakequantizedmaximum": 24, "fakequantizedmax": 24, "fakequantizedminimum": 24, "fakequantizedmin": 24, "bmm": 24, "fakequantizedbmm": 24, "logicalor": 24, "fakequantizedlogicalor": 24, "logicaland": 24, "fakequantizedlogicaland": 24, "customgath": 24, "fakequantizedcustomgath": 24, "gathernd": 24, "fakequantizedgathernd": 24, "baddbmm": 24, "fakequantizedbaddbmm": 24, "addmm": 24, "fakequantizedaddmm": 24, "scatternd": 24, "fakequantizedscatternd": 24, "dynamicconv2d": 24, "fakequantizeddynamicconv2d": 24, "scatterel": 24, "fakequantizedscatterel": 24, "batchnorm": [24, 29, 39, 51], "fakequantizedbatchnorm": 24, "fakequantizedaimetgroupnorm": 24, "nonmaxsuppress": 24, "fakequantizednonmaxsuppress": 24, "fakequantizedsplit": 24, "concat": [24, 46], "fakequantizedconcat": 24, "fakequantizedwher": 24, "maskedfil": 24, "fakequantizedmaskedfil": 24, "allow_overwrit": [25, 26], "allow_overwit": 25, "flag": [25, 26], "get_encod": 25, "get_legacy_encod": 25, "register_quantization_paramet": 25, "set_legacy_encod": 25, "learn": [26, 30, 36, 38, 41, 44, 45, 46], "v1": [26, 45], "debug": [26, 27, 43, 45], "simpler": 26, "extend": [26, 45], "overview": 26, "fundament": 26, "advis": [26, 38, 42, 45], "subject": [26, 45], "understand": [26, 38, 42, 48, 49], "interact": 26, "remain": [26, 33, 38, 39, 44], "hood": 26, "build": [26, 45], "properti": 26, "shown": [26, 28, 36, 39, 40, 43], "intern": [26, 36, 38, 39, 42], "compon": [26, 45], "namespac": [26, 45], "directli": [26, 40, 44], "adaround_weight": 26, "sequenti": [26, 42, 43, 45], "mse": [26, 40, 44, 45], "seq_ms": 26, "apply_seq_ms": 26, "quantanalyz": [26, 38, 45, 46], "quant_analyz": 26, "autoqu": [26, 38, 41, 46], "auto_qu": 26, "longer": [26, 38, 41], "libpymo": 26, "statement": [26, 37], "stai": 26, "quantschem": [26, 29], "cross_layer_equ": [26, 37], "equalize_model": [26, 37], "model_prepar": [26, 27], "prepare_model": [26, 27], "wrap": 26, "quantizewrapp": 26, "quantizationsimmodelv1": 26, "all_quant_wrapp": 26, "quant_wrapp": 26, "staticgridquantwrapp": 26, "_module_to_wrap": 26, "200": 26, "contrast": 26, "definit": [26, 27, 38], "quantizationsimmodelv2": 26, "sim2": 26, "all_q_modul": 26, "qmodul": 26, "q_modul": 26, "here": [26, 27, 32, 41, 48], "reli": 26, "staticgridquant": 26, "learnedgridquant": 26, "could": [26, 27, 31, 51], "quantizationdatatyp": 26, "tensor_quant": 26, "staticgridperchannelquant": 26, "fp_quantiz": 26, "data_typ": 26, "affine_quant": 26, "howev": [26, 38, 39, 41, 42, 44], "separ": [26, 30, 40, 43, 46], "relat": [26, 40, 44], "affine_q": 26, "affine_qdq": 26, "fp_qdq": 26, "floatquantizedequant": 26, "sim1": 26, "wrap_linear": 26, "symmetri": 26, "use_symmetric_encod": 26, "is_unsigned_symmetr": 26, "use_strict_symmetr": 26, "simplifi": 26, "tfencod": 26, "copy_": 26, "_remove_input_quant": 26, "_remove_output_quant": 26, "_remove_param_quant": 26, "param_encod": 26, "temporarili": 26, "assert": [26, 27], "freez": [26, 28], "_is_encoding_frozen": 26, "freeze_encod": 26, "concept": 26, "mimick": 26, "involv": [26, 27, 38, 43, 45], "requires_grad_": 26, "prevent": [26, 31], "overwritten": 26, "ti": 26, "design": [26, 39, 45], "portabl": [26, 45], "It": [26, 27, 30, 33, 38, 39, 42, 48, 49, 51], "guidelin": [26, 27, 28, 32, 41], "learnedgridquantwrapp": 26, "encodinganalyzerforpython": 26, "affineencod": 26, "floatencod": 26, "vectorencod": 26, "tutori": 27, "simpl": [27, 38, 51], "intend": [27, 32], "meant": 27, "demonstr": 27, "art": 27, "eval": [27, 33, 36, 48], "loop": [27, 43], "evalu": [27, 29, 33, 36, 38, 40, 41, 44, 48], "clearli": 27, "what": [27, 44, 48], "happen": 27, "let": 27, "special": 27, "look": [27, 48], "torchvis": 27, "is_avail": 27, "loader": [27, 28], "cifar10_train_data": 27, "dataset": [27, 38, 39, 44], "fashionmnist": 27, "tmp": 27, "cifar10": 27, "totensor": 27, "cifar10_test_data": 27, "train_load": 27, "dataload": [27, 40], "batch_siz": 27, "shuffl": 27, "test_load": 27, "super": 27, "conv1": 27, "kernel_s": 27, "stride": 27, "bn_1": 27, "256": [27, 40], "bn_2": 27, "total": [27, 33, 44], "now": [27, 38, 45, 46, 51], "few": [27, 32, 38, 43, 44], "epoch": [27, 34, 36, 38, 41], "establish": 27, "baselin": [27, 33, 41], "send": 27, "loss_fn": 27, "adam": 27, "lr": 27, "1e": [27, 41], "batch_idx": 27, "enumer": [27, 30], "backward": 27, "zero_grad": 27, "fp_accuraci": 27, "91": 27, "70999908447266": 27, "accur": 27, "coupl": [27, 28], "care": 27, "conform": 27, "math": 27, "wherea": [27, 44], "incorrectli": 27, "ignor": 27, "complet": [27, 30, 43], "redefin": 27, "thankfulli": 27, "incompat": 27, "fulli": [27, 35], "prepared_model": 27, "fp_accuracy_prepar": 27, "2024": 27, "07": 27, "747": 27, "root": 27, "info": [27, 46], "806": 27, "modelprepar": 27, "ad": [27, 35, 38, 42, 46], "node": [27, 41, 44], "module_relu": 27, "module_relu_1": 27, "module_softmax": 27, "graphmodul": 27, "ep": 27, "momentum": 27, "track_running_stat": 27, "12544": 27, "getattr_1": 27, "getitem": 27, "graph_modul": 27, "print_read": 27, "distinct": 27, "execut": [27, 33, 48], "adjac": [27, 42], "whenev": 27, "unnecessari": [27, 51], "good": [27, 28], "idea": 27, "batch_norm_fold": 27, "iter": [27, 28, 39], "fold_all_batch_norm": 27, "input_shap": 27, "passthrough": 27, "previous": 27, "had": 27, "impact": [27, 33, 43], "readi": [27, 43], "encount": 27, "therefor": [27, 32, 39], "theoret": 27, "practic": [27, 36], "usual": [27, 41], "500": [27, 28, 39, 40], "1000": [27, 28, 39, 40], "estim": [27, 38, 39], "default_output_bw": 27, "default_param_bw": 27, "idx": 27, "break": 27, "quantized_accuraci": 27, "1500015258789": 27, "noth": 27, "everi": [27, 33, 36, 41, 49], "construct": [27, 37], "discuss": [27, 32, 43, 44], "advanc": [27, 45], "re": [27, 38], "One": [27, 32, 36, 47], "qat": [27, 28, 30, 34, 38, 43, 44, 46], "op": [27, 38, 42, 46], "repeat": [27, 31], "post_qat_accuraci": 27, "92": 27, "05333709716797": 27, "happi": 27, "export_path": 27, "model_nam": 27, "fashion_mnist_model": 27, "save": [27, 29, 44, 49], "sent": 27, "nearest": 28, "figur": [28, 33, 43, 51], "illustr": [28, 33, 38, 47, 50], "smaller": [28, 34, 43, 47, 50], "unlabel": [28, 38, 40, 44], "far": 28, "decid": [28, 48], "whether": [28, 41], "awai": 28, "closer": 28, "fp32": [28, 34, 39, 40, 41, 43, 44], "width": [28, 43, 44, 47, 50, 51], "bc": 28, "bnf": 28, "batch": [28, 30, 38, 39, 40], "cle": [28, 38, 43, 46], "cross": [28, 29, 37, 38, 40, 49], "hbf": 28, "awar": [28, 30, 34, 38, 43, 44], "don": 28, "But": [28, 36], "benefici": [28, 40, 41], "consid": [28, 33, 38, 43], "help": [28, 33, 36, 38, 39, 40, 43, 48, 49], "Not": [28, 33], "hyper": [28, 41], "expos": 28, "stabl": 28, "mani": [28, 39, 44], "often": [28, 29, 36, 41], "approxim": [28, 32, 39, 40], "1024": [28, 37], "10000": 28, "moder": 28, "least": [28, 31], "beta": 28, "warm": 28, "period": 28, "kera": [28, 30, 34, 38, 39, 40, 42, 44, 46], "offer": 29, "suit": 29, "sequenc": [29, 30, 37, 42], "try": [29, 31, 33, 36, 38, 43], "variou": [29, 32, 36, 38, 43, 44, 46, 49], "error": [29, 38, 41, 43, 44], "prone": 29, "consum": [29, 36], "amount": [29, 42], "toler": [29, 32], "soon": 29, "reach": [29, 32], "stop": 29, "summari": 29, "autom": [29, 38], "prepar": [29, 38, 46], "check": [29, 38, 41, 43], "friendli": [29, 38, 39], "denot": 29, "select": [29, 32, 40, 44, 48, 51], "best": [29, 32, 36, 38, 44], "preprat": 29, "mainli": 29, "three": [29, 32, 49], "stage": 29, "effort": 29, "manner": 29, "fail": [29, 37, 38], "goal": 29, "small": [30, 34, 38], "preceed": 30, "pcq": [30, 40], "veri": [30, 32, 36, 40, 49, 51], "NOT": [30, 51], "scenario": [30, 36, 38, 51], "decreas": 30, "main": [30, 42, 46, 49], "issu": [30, 34, 37, 43, 46, 48, 49], "depthwis": [30, 46], "oscil": 30, "quant": 30, "flow": [30, 38, 41, 43, 44], "diagram": [30, 33, 36, 44, 47, 50], "explain": [31, 36, 39, 44, 51], "occurr": 31, "detail": [31, 33, 34, 36, 38, 43, 44, 48, 49], "ratio": [31, 32, 48], "magnitud": 31, "matrix": 31, "upstream": [31, 51], "gain": [31, 36], "presenc": 31, "connect": [31, 35, 50], "residu": 31, "sometim": [31, 36, 39, 40], "attempt": [31, 38, 39], "close": [31, 32, 44], "prior": [31, 38, 40], "random": [31, 40], "regress": 31, "document": [32, 34, 45, 46], "svd": [32, 33, 35, 36, 46], "spatial": [32, 33, 35, 36, 46], "ssvd": 32, "prune": [32, 33, 35, 36, 46, 51], "accumul": 32, "mac": [32, 36, 47, 50], "reduct": 32, "uncompress": 32, "algorithm": [32, 33, 36, 43, 51], "overal": [32, 36, 43], "latenc": 32, "bandwidth": 32, "vari": [32, 33, 39, 49], "architectur": 32, "io": [32, 46], "At": [32, 36, 45], "half": 32, "unknown": 32, "apriori": 32, "cssvd": 32, "tri": [32, 38], "75": 32, "pick": [32, 33, 36], "2b": 32, "rel": [32, 38, 43, 49], "avoid": 32, "larg": [32, 41, 47, 50], "2a": 32, "revisit": 32, "ccp": 32, "resnet": 32, "csvd": 32, "assess": 33, "sensit": [33, 38, 40, 43, 44, 46], "applic": [33, 37], "find": [33, 38, 40, 41, 44], "sure": [33, 37], "highest": 33, "dictionari": [33, 36, 42], "column": 33, "captur": 33, "predefin": 33, "candid": [33, 36], "unmodifi": 33, "score": [33, 36, 48], "last": [33, 35, 43], "monoton": 33, "fit": 33, "strict": [33, 42, 44], "procedur": [33, 36], "curv": 33, "core": 33, "constant": [33, 38], "met": 33, "binari": 33, "solut": [33, 41, 43], "quickli": 33, "suggest": [33, 36, 39], "lesser": [33, 36], "drstical": 33, "softwar": [34, 36], "framework": [34, 38, 42, 44], "meta": [34, 38], "h5": [34, 38], "hw": 34, "ptq": [34, 38, 40, 41], "redund": 34, "dilat": 35, "modules_to_ignor": 35, "depthwiseconv2d": 35, "librari": 36, "guidebook": [36, 38], "advic": 36, "greedi": [36, 48], "phase": [36, 38], "choic": [36, 44], "nomin": 36, "And": 36, "ml": [36, 38, 39, 48, 49], "those": 36, "fc": 36, "decompos": [36, 47, 50], "term": [36, 47, 48, 49, 50], "sharp": 36, "degrad": 36, "might": [36, 40], "rate": [36, 41], "carefulli": 36, "decai": 36, "slow": 36, "someth": [36, 48], "speed": [36, 39, 46], "itself": [36, 44, 47, 50], "load": 36, "searcher": 36, "Or": 36, "strike": 36, "balanc": 36, "chosen": 36, "major": [36, 45], "sai": 36, "xiangyu": 36, "zhang": 36, "jianhua": 36, "zou": 36, "kaim": 36, "he": 36, "jian": 36, "sun": 36, "deep": 36, "classif": 36, "detect": 36, "transact": 36, "pattern": 36, "analysi": [36, 43], "intellig": 36, "vol": 36, "pp": 36, "1943": 36, "1955": 36, "oct": 36, "2016": 36, "yihui": 36, "confer": [36, 39], "vision": [36, 39], "iccv": [36, 39], "venic": 36, "2017": 36, "1398": 36, "1406": 36, "jaderberg": 36, "andrea": 36, "vedaldi": 36, "andrew": 36, "zisserman": 36, "british": 36, "jan": 36, "2014": 36, "andrei": 36, "kuzmin": 36, "marku": [36, 39], "nagel": [36, 39], "saurabh": 36, "pitr": 36, "sandeep": 36, "pendyam": 36, "tijmen": [36, 39], "blankevoort": [36, 39], "taxonomi": 36, "graph": [37, 38, 44, 48], "successfulli": 37, "potenti": [37, 40, 48, 49], "workaround": 37, "primit": 37, "around": 37, "rewrit": 37, "slice": 37, "written": [37, 38], "caus": [37, 43, 44], "align_corn": 37, "deconvolut": 37, "deeplabv3": 37, "address": [37, 43, 48], "releas": [37, 45], "hardwar": [38, 39, 44], "predict": 38, "oppos": [38, 42], "advantag": 38, "No": 38, "pipelin": [38, 41, 43, 44], "suffici": [38, 40, 41, 44], "even": 38, "fast": 38, "easi": [38, 40], "gap": 38, "insert": [38, 44], "robust": 38, "account": [38, 41, 43], "trainabl": 38, "bias": 38, "reflect": [38, 44], "integr": 38, "standalon": 38, "consecut": [38, 39], "bn": [38, 46], "deprec": 38, "prep": 38, "accord": [38, 41, 42, 44], "align": 38, "retri": 38, "continu": [38, 39, 41, 43], "warn": 38, "hand": 38, "satisfactori": [38, 43], "bring": 38, "onto": 38, "thing": 38, "item": 38, "checkpoint": 38, "pb": 38, "trial": 38, "seem": 38, "off": [38, 39, 42], "bat": 38, "becom": 39, "paper": 39, "2019": 39, "arxiv": 39, "1906": 39, "04721": 39, "surround": 39, "highlight": [39, 48, 49], "big": 39, "discrep": 39, "accept": [39, 43], "wide": 39, "varianc": 39, "seen": [39, 40], "significantli": 39, "quantizaion": 39, "distribut": [39, 43, 44], "did": 39, "shift": 39, "empir": 39, "analyt": [39, 48, 49], "extract": 39, "bottleneck": [39, 43], "hybrid": 39, "approach": [39, 44], "mart": 39, "van": 39, "baalen": 39, "seoul": 39, "octob": 39, "hotspot": 40, "analys": 40, "callback": [40, 44], "plot": 40, "pretrain": [40, 41, 44], "dummi": 40, "label": [40, 41], "metric": [40, 44], "rune": 40, "doc": [40, 42, 48], "situat": 40, "pinpoint": 40, "culprit": 40, "again": [40, 41, 48], "per_layer_quant_en": 40, "per_layer_quant_dis": 40, "axi": 40, "track": 40, "min_max_rang": 40, "folder": 40, "enhanc": [40, 44], "toss": 40, "displai": [40, 48, 49], "activations_pdf": 40, "weights_pdf": 40, "monitor": 40, "contribut": [40, 43], "read": 40, "per_layer_mse_loss": 40, "mitig": [41, 44], "hyperparamet": 41, "accompani": 41, "throughout": [41, 42, 45, 49], "aid": 41, "converg": 41, "schedul": 41, "placement": 42, "fuse": [42, 44], "six": 42, "overrul": 42, "turn": 42, "op_typ": 42, "empti": 42, "is_output_quant": 42, "is_quant": 42, "strict_symmetr": 42, "unsigned_symmetr": 42, "omit": 42, "altogeth": 42, "asid": 42, "govern": 42, "unsign": [42, 44], "gemm": 42, "is_input_quant": 42, "recogn": [42, 44], "keep": [42, 43], "convent": 42, "preced": 42, "supergroup": [42, 46], "made": [42, 45], "op_list": 42, "member": 42, "branch": 42, "config": [42, 46], "entri": 42, "string": 42, "model_input": 42, "whatev": 42, "earlier": 42, "model_output": 42, "diagnost": 43, "strictli": 43, "insight": [43, 48, 49], "why": 43, "underperform": 43, "tackl": 43, "chart": 43, "saniti": 43, "ofth": 43, "independ": 43, "kept": 43, "convers": 43, "toward": 43, "wise": 43, "uneven": 43, "vanilla": 43, "global": 43, "restor": 43, "rest": 43, "inner": 43, "token": 43, "bert": 43, "reveal": 43, "problemat": [43, 49], "problem": 43, "resort": 43, "revert": 43, "power": [43, 45], "ultim": 44, "copi": 44, "ingest": 44, "feed": 44, "000": 44, "yield": 44, "dequantiz": 44, "hook": 44, "intercept": 44, "four": 44, "zero": [44, 46], "vice": 44, "versa": 44, "textrm": 44, "dfrac": 44, "strong": 44, "excess": 44, "signal": 44, "satur": 44, "erro": 44, "static": 44, "alongsid": 44, "ones": 44, "just": [44, 48, 51], "non": 44, "intermedi": 44, "welcom": 45, "motiv": 45, "clean": 45, "ground": 45, "maintain": 45, "familiar": 45, "newli": 45, "flexibl": 45, "transpar": 45, "redesign": 45, "yet": 45, "mainlin": 45, "compris": 45, "dispatch": 45, "easili": 45, "move": 45, "uphold": 45, "migrat": 45, "navig": 45, "blockwis": 45, "slim": 46, "backslash": 46, "user_guid": 46, "api_doc": 46, "quantizablemultiheadattent": 46, "kyuykim": 46, "multi": 46, "mangal": 46, "geunle": 46, "bug": 46, "correctli": 46, "leaf": 46, "klhsieh": 46, "akhobar": 46, "multiheadattent": 46, "ashvkuma": 46, "mha": 46, "pdf": 46, "fp16": 46, "minor": 46, "stand": [46, 47, 50], "adaptiveround": 46, "recurr": 46, "packag": 46, "decomposit": [47, 50], "singular": [47, 50], "\ud835\udc5a": [47, 50], "\ud835\udc5b": [47, 50], "\u210e": [47, 50], "\ud835\udc64": [47, 50], "give": [47, 50], "height": [47, 50, 51], "\ud835\udc58": [47, 50], "k": 47, "rank": [47, 50], "degre": [47, 50], "progress": [48, 49], "computation": [48, 49], "task": [48, 49], "websocket": 48, "tell": 48, "listen": 48, "rather": 48, "5006": 48, "compress_model": 48, "visualizecompress": 48, "display_eval_scor": 48, "display_comp_ratio_plot": 48, "directori": 49, "lot": 49, "anoth": [50, 51], "lose": 51, "much": 51, "explicitli": 51, "pictori": 51, "volum": 51, "hxwx8": 51, "hxwx5": 51, "simpli": 51, "propag": 51, "That": 51, "teh": 51, "green": 51, "color": 51, "side": 51, "action": 51, "taken": 51, "pink": 51, "orang": 51}, "objects": {"aimet_torch.v2.nn": [[6, 0, 1, "", "FakeQuantizationMixin"], [7, 0, 1, "", "QuantizationMixin"]], "aimet_torch.v2.nn.FakeQuantizationMixin": [[6, 1, 1, "", "__quant_init__"], [6, 1, 1, "", "compute_encodings"], [6, 1, 1, "", "forward"], [6, 1, 1, "", "from_module"], [6, 1, 1, "", "implements"], [6, 2, 1, "", "input_quantizers"], [6, 2, 1, "", "output_quantizers"], [6, 2, 1, "", "param_quantizers"]], "aimet_torch.v2.nn.QuantizationMixin": [[7, 1, 1, "", "__quant_init__"], [7, 1, 1, "", "compute_encodings"], [7, 1, 1, "", "forward"], [7, 1, 1, "", "from_module"], [7, 1, 1, "", "get_default_kernel"], [7, 1, 1, "", "get_kernel"], [7, 1, 1, "", "implements"], [7, 2, 1, "", "input_quantizers"], [7, 2, 1, "", "output_quantizers"], [7, 2, 1, "", "param_quantizers"], [7, 1, 1, "", "set_default_kernel"], [7, 1, 1, "", "set_kernel"]], "aimet_torch.v2.nn.base": [[24, 0, 1, "", "BaseQuantizationMixin"]], "aimet_torch.v2.nn.base.BaseQuantizationMixin": [[24, 1, 1, "", "__quant_init__"], [24, 1, 1, "", "compute_encodings"], [24, 1, 1, "", "forward"], [24, 2, 1, "", "input_quantizers"], [24, 2, 1, "", "output_quantizers"], [24, 2, 1, "", "param_quantizers"]], "aimet_torch.v2.quantization": [[13, 3, 0, "-", "affine"], [15, 3, 0, "-", "float"]], "aimet_torch.v2.quantization.affine": [[8, 0, 1, "", "Quantize"], [9, 0, 1, "", "QuantizeDequantize"], [10, 4, 1, "", "dequantize"], [11, 4, 1, "", "quantize"], [12, 4, 1, "", "quantize_dequantize"]], "aimet_torch.v2.quantization.affine.Quantize": [[8, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.QuantizeDequantize": [[9, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer": [[25, 0, 1, "", "Quantize"], [25, 0, 1, "", "QuantizeDequantize"], [25, 0, 1, "", "QuantizerBase"]], "aimet_torch.v2.quantization.affine.quantizer.Quantize": [[25, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize": [[25, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase": [[25, 1, 1, "", "allow_overwrite"], [25, 1, 1, "", "compute_encodings"], [25, 1, 1, "", "get_encoding"], [25, 1, 1, "", "get_legacy_encodings"], [25, 1, 1, "", "is_initialized"], [25, 1, 1, "", "register_quantization_parameter"], [25, 1, 1, "", "set_legacy_encodings"]], "aimet_torch.v2.quantization.encoding_analyzer": [[18, 0, 1, "", "EncodingAnalyzer"], [20, 0, 1, "", "MinMaxEncodingAnalyzer"], [21, 0, 1, "", "PercentileEncodingAnalyzer"], [22, 0, 1, "", "SqnrEncodingAnalyzer"]], "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer": [[21, 1, 1, "", "set_percentile"]], "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer": [[22, 1, 1, "", "compute_encodings_from_stats"]], "aimet_torch.v2.quantization.float": [[14, 0, 1, "", "FloatQuantizeDequantize"], [14, 0, 1, "", "QuantizeDequantize"]], "aimet_torch.v2.quantization.tensor": [[16, 0, 1, "", "DequantizedTensor"], [16, 0, 1, "", "QuantizedTensor"]], "aimet_torch.v2.quantization.tensor.DequantizedTensor": [[16, 1, 1, "", "dequantize"], [16, 1, 1, "", "quantize"], [16, 1, 1, "", "quantized_repr"]], "aimet_torch.v2.quantization.tensor.QuantizedTensor": [[16, 1, 1, "", "dequantize"], [16, 1, 1, "", "quantize"], [16, 1, 1, "", "quantized_repr"]], "aimet_torch.v2.quantsim.config_utils": [[17, 4, 1, "", "set_activation_quantizers_to_float"], [17, 4, 1, "", "set_blockwise_quantization_for_weights"], [17, 4, 1, "", "set_grouped_blockwise_quantization_for_weights"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"aimet": [2, 3, 4, 23, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 50, 51], "instal": [2, 3, 4, 34], "quick": 2, "releas": [2, 3, 4, 34, 46], "packag": [2, 3, 4], "system": 2, "requir": [2, 40], "advanc": 2, "instruct": 2, "docker": 3, "set": 3, "variant": [3, 18], "us": [3, 28, 36, 38, 45, 48], "prebuilt": 3, "imag": 3, "build": 3, "local": 3, "start": [3, 23, 34, 48], "contain": 3, "from": [3, 4, 26], "pypi": [3, 4], "environ": [3, 4], "setup": [3, 4], "prerequisit": [4, 27], "gpu": 4, "pytorch": [4, 23, 27, 37, 38, 49], "2": [4, 27, 46], "1": [4, 27, 46], "tensorflow": [4, 38, 49], "13": [4, 46], "onnx": 4, "common": [4, 28], "debian": 4, "torch": 4, "replac": 4, "pillow": 4, "simd": 4, "onnxruntim": 4, "post": [4, 19, 38, 39], "step": 4, "fakequantizationmixin": 6, "quantizationmixin": 7, "quantiz": [8, 11, 13, 15, 16, 17, 19, 24, 25, 26, 27, 38, 39, 41, 42, 43, 44, 49], "quantizedequant": [9, 14], "dequant": 10, "quantize_dequant": 12, "affin": [13, 26], "class": [13, 16, 24], "function": 13, "floatquantizedequant": 14, "float": [15, 26, 27], "tensor": 16, "blockwis": 17, "low": 17, "power": 17, "lpbq": 17, "top": [17, 24, 25], "level": [17, 24, 25], "api": [17, 23, 24, 25, 28, 29, 30, 39, 40, 44], "export": [17, 27], "encod": [18, 24, 44], "analyz": 18, "train": [19, 27, 38, 39, 41], "minmaxencodinganalyz": 20, "percentileencodinganalyz": 21, "sqnrencodinganalyz": 22, "ai": [23, 34], "model": [23, 27, 34, 36, 37, 38], "effici": [23, 34], "toolkit": [23, 34], "document": 23, "get": [23, 34, 36], "exampl": [23, 26], "featur": [23, 26, 32, 34, 38, 43, 45], "descript": [23, 40], "modul": [24, 26], "configur": [24, 42, 44], "comput": 24, "migrat": 26, "quantsim": [26, 27, 44, 45], "v2": [26, 45], "chang": 26, "process": 26, "import": 26, "quantizationsimmodel": 26, "move": 26, "quantwrapp": 26, "staticgrid": 26, "learnedgrid": 26, "code": 26, "deprec": 26, "quickstart": 27, "guid": [27, 34], "overal": [27, 31], "flow": [27, 39], "prepar": 27, "point": 27, "batchnorm": 27, "fold": 27, "fine": [27, 36], "tune": [27, 36], "awar": [27, 41], "adaround": 28, "case": [28, 36, 38], "terminologi": 28, "autoqu": 29, "overview": [29, 30, 33, 34, 36, 39, 40, 41, 42, 44, 45, 48, 49, 51], "workflow": [29, 30, 38, 41, 44], "bn": 30, "re": 30, "estim": 30, "channel": 31, "prune": 31, "procedur": 31, "select": [31, 33, 36], "winnow": [31, 51], "weight": [31, 50], "reconstruct": 31, "compress": [32, 33, 36, 48], "guidebook": [32, 43], "greedi": 33, "ratio": [33, 36], "how": [33, 42, 48, 51], "work": [33, 51], "per": [33, 36], "layer": [33, 36, 39], "explor": 33, "user": [34, 39], "inform": 34, "toc": 34, "tree": 34, "known": 35, "issu": 35, "option": 36, "techniqu": [36, 39], "better": 36, "result": 36, "rank": 36, "round": 36, "faq": [36, 39], "refer": [36, 39], "guidelin": [37, 38], "debug": 38, "analysi": [38, 40], "tool": [38, 48], "cross": 39, "equal": 39, "quantanalyz": 40, "detail": 40, "qat": 41, "mode": 41, "recommend": 41, "simul": [42, 44], "file": 42, "structur": 42, "individu": 42, "section": 42, "nois": 44, "determin": 44, "paramet": 44, "scheme": 44, "op": 44, "frequent": 44, "ask": 44, "question": 44, "new": 45, "note": 46, "22": 46, "0": 46, "21": 46, "20": 46, "19": 46, "py37": 46, "18": 46, "17": 46, "16": 46, "14": 46, "spatial": 47, "svd": [47, 50], "visual": [48, 49], "design": 48, "bokeh": 48, "server": 48, "session": 48}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"AIMET Installation": [[2, "aimet-installation"]], "Quick Install": [[2, "quick-install"]], "Release Packages": [[2, "release-packages"]], "System Requirements": [[2, "system-requirements"]], "Advanced Installation Instructions": [[2, "advanced-installation-instructions"]], "AIMET Installation in Docker": [[3, "aimet-installation-in-docker"]], "Set variant": [[3, "set-variant"]], "Use prebuilt docker image": [[3, "use-prebuilt-docker-image"]], "Build docker image locally": [[3, "build-docker-image-locally"]], "Start docker container": [[3, "start-docker-container"]], "Install AIMET packages": [[3, "install-aimet-packages"], [4, "install-aimet-packages"]], "From PyPI": [[3, "from-pypi"], [4, "from-pypi"]], "From Release Package": [[3, "from-release-package"], [4, "from-release-package"]], "Environment setup": [[3, "environment-setup"], [4, "environment-setup"]], "AIMET Installation and Setup": [[4, "aimet-installation-and-setup"]], "Install prerequisite packages": [[4, "install-prerequisite-packages"]], "Install GPU packages": [[4, "install-gpu-packages"]], "Install GPU packages for PyTorch 2.1 or TensorFlow": [[4, "install-gpu-packages-for-pytorch-2-1-or-tensorflow"]], "Install GPU packages for PyTorch 1.13 or ONNX": [[4, "install-gpu-packages-for-pytorch-1-13-or-onnx"]], "Install common debian packages": [[4, "install-common-debian-packages"]], "Install tensorflow GPU debian packages": [[4, "install-tensorflow-gpu-debian-packages"]], "Install torch GPU debian packages": [[4, "install-torch-gpu-debian-packages"]], "Install ONNX GPU debian packages": [[4, "install-onnx-gpu-debian-packages"]], "Replace Pillow with Pillow-SIMD": [[4, "replace-pillow-with-pillow-simd"]], "Replace onnxruntime with onnxruntime-gpu": [[4, "replace-onnxruntime-with-onnxruntime-gpu"]], "Post installation steps": [[4, "post-installation-steps"]], "FakeQuantizationMixin": [[6, "fakequantizationmixin"]], "QuantizationMixin": [[7, "quantizationmixin"]], "Quantize": [[8, "quantize"]], "QuantizeDequantize": [[9, "quantizedequantize"], [14, "quantizedequantize"]], "dequantize": [[10, "dequantize"]], "quantize": [[11, "quantize"]], "quantize_dequantize": [[12, "quantize-dequantize"]], "quantization.affine": [[13, "module-aimet_torch.v2.quantization.affine"]], "Classes": [[13, "classes"], [16, "classes"]], "Functions": [[13, "functions"]], "FloatQuantizeDequantize": [[14, "floatquantizedequantize"]], "quantization.float": [[15, "module-aimet_torch.v2.quantization.float"]], "quantization.tensor": [[16, "quantization-tensor"]], "Blockwise Quantization": [[17, "blockwise-quantization"]], "Low Power Blockwise Quantization (LPBQ)": [[17, "low-power-blockwise-quantization-lpbq"]], "Top Level API": [[17, "top-level-api"]], "Export": [[17, "export"]], "Encoding Analyzers": [[18, "encoding-analyzers"]], "Variants": [[18, "variants"]], "Post-Training Quantization": [[19, "post-training-quantization"], [38, "post-training-quantization"]], "MinMaxEncodingAnalyzer": [[20, "minmaxencodinganalyzer"]], "PercentileEncodingAnalyzer": [[21, "percentileencodinganalyzer"]], "SqnrEncodingAnalyzer": [[22, "sqnrencodinganalyzer"]], "AIMET: AI Model Efficiency Toolkit Documentation": [[23, "aimet-ai-model-efficiency-toolkit-documentation"]], "Getting Started": [[23, "getting-started"], [34, "getting-started"]], "Examples": [[23, null]], "Feature Descriptions": [[23, null]], "AIMET PyTorch API": [[23, null]], "Quantized Modules": [[24, "quantized-modules"]], "Top-level API": [[24, "top-level-api"], [25, "top-level-api"]], "Configuration": [[24, "configuration"]], "Computing Encodings": [[24, "computing-encodings"]], "Quantized Module Classes": [[24, "quantized-module-classes"]], "Quantizers": [[25, "quantizers"]], "Migrate to QuantSim v2": [[26, "migrate-to-quantsim-v2"]], "Changes in QuantSim v2": [[26, "changes-in-quantsim-v2"]], "Migration Process": [[26, "migration-process"]], "Imports": [[26, "imports"]], "QuantizationSimModel": [[26, "quantizationsimmodel"]], "Moving from QuantWrapper to Quantized Modules": [[26, "moving-from-quantwrapper-to-quantized-modules"]], "Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantizer": [[26, "moving-from-staticgrid-and-learnedgrid-quantizer-to-affine-and-float-quantizer"]], "Code Examples": [[26, "code-examples"]], "Deprecated Features": [[26, "deprecated-features"]], "Quickstart Guide": [[27, "quickstart-guide"]], "Overall flow": [[27, "overall-flow"]], "PyTorch prerequisites": [[27, "pytorch-prerequisites"]], "Prepare the floating point model for quantization": [[27, "prepare-the-floating-point-model-for-quantization"]], "1) Model preparation": [[27, "model-preparation"]], "2) BatchNorm fold": [[27, "batchnorm-fold"]], "Quantize the model": [[27, "quantize-the-model"]], "Fine-tune the model with quantization aware training": [[27, "fine-tune-the-model-with-quantization-aware-training"]], "Export the quantsim model": [[27, "export-the-quantsim-model"]], "AIMET AdaRound": [[28, "aimet-adaround"]], "AdaRound Use Cases": [[28, "adaround-use-cases"]], "Common terminology": [[28, "common-terminology"]], "Use Cases": [[28, "use-cases"], [38, "use-cases"]], "AdaRound API": [[28, "adaround-api"]], "AIMET AutoQuant": [[29, "aimet-autoquant"]], "Overview": [[29, "overview"], [30, "overview"], [33, "overview"], [34, "overview"], [36, "overview"], [39, "overview"], [40, "overview"], [41, "overview"], [42, "overview"], [44, "overview"], [45, "overview"], [48, "overview"], [49, "overview"], [51, "overview"]], "Workflow": [[29, "workflow"], [30, "workflow"]], "AutoQuant API": [[29, "autoquant-api"]], "AIMET BN Re-estimation": [[30, "aimet-bn-re-estimation"]], "BN Re-estimation API": [[30, "bn-re-estimation-api"]], "AIMET Channel Pruning": [[31, "aimet-channel-pruning"]], "Overall Procedure": [[31, "overall-procedure"]], "Channel Selection": [[31, "channel-selection"]], "Winnowing": [[31, "winnowing"]], "Weight Reconstruction": [[31, "weight-reconstruction"]], "AIMET Compression Features Guidebook": [[32, "aimet-compression-features-guidebook"]], "AIMET Greedy Compression Ratio Selection": [[33, "aimet-greedy-compression-ratio-selection"]], "How it works": [[33, "how-it-works"]], "Per-layer Exploration": [[33, "per-layer-exploration"]], "Compression Ratio Selection": [[33, "compression-ratio-selection"]], "AI Model Efficiency Toolkit User Guide": [[34, "ai-model-efficiency-toolkit-user-guide"]], "Features": [[34, "features"]], "Release Information": [[34, "release-information"]], "Installation Guide": [[34, "installation-guide"]], "toc tree": [[34, "toc-tree"]], "AIMET Known Issues": [[35, "aimet-known-issues"]], "AIMET Model Compression": [[36, "aimet-model-compression"]], "Use Case": [[36, "use-case"]], "Compression ratio selection": [[36, "compression-ratio-selection"]], "Model Compression": [[36, "model-compression"]], "Optional techniques to get better compression results": [[36, "optional-techniques-to-get-better-compression-results"]], "Rank Rounding": [[36, "rank-rounding"]], "Per-layer Fine-tuning": [[36, "per-layer-fine-tuning"]], "FAQs": [[36, "faqs"], [39, "faqs"]], "References": [[36, "references"], [39, "references"]], "Model Guidelines for PyTorch": [[37, "model-guidelines-for-pytorch"]], "AIMET Model Quantization": [[38, "aimet-model-quantization"]], "AIMET Quantization Features": [[38, "aimet-quantization-features"]], "Debugging/Analysis Tools": [[38, "debugging-analysis-tools"]], "AIMET Quantization Workflow": [[38, "aimet-quantization-workflow"]], "PyTorch": [[38, "pytorch"], [49, "pytorch"]], "Tensorflow": [[38, "tensorflow"]], "Debugging Guidelines": [[38, "debugging-guidelines"]], "AIMET Post-Training Quantization Techniques": [[39, "aimet-post-training-quantization-techniques"]], "User Flow": [[39, "user-flow"]], "Cross-Layer Equalization API": [[39, "cross-layer-equalization-api"]], "AIMET QuantAnalyzer": [[40, "aimet-quantanalyzer"]], "Requirements": [[40, "requirements"]], "Detailed Analysis Descriptions": [[40, "detailed-analysis-descriptions"]], "QuantAnalyzer API": [[40, "quantanalyzer-api"]], "AIMET Quantization Aware Training": [[41, "aimet-quantization-aware-training"]], "QAT workflow": [[41, "qat-workflow"]], "QAT modes": [[41, "qat-modes"]], "Recommendations for Quantization-Aware Training": [[41, "recommendations-for-quantization-aware-training"]], "Quantization Simulation Configuration": [[42, "quantization-simulation-configuration"]], "Configuration File Structure": [[42, "configuration-file-structure"]], "How to configure individual Configuration File Sections": [[42, "how-to-configure-individual-configuration-file-sections"]], "AIMET Quantization Features Guidebook": [[43, "aimet-quantization-features-guidebook"]], "AIMET Quantization Simulation": [[44, "aimet-quantization-simulation"]], "QuantSim Workflow": [[44, "quantsim-workflow"]], "Simulating Quantization Noise": [[44, "simulating-quantization-noise"]], "Determining Quantization Parameters (Encodings)": [[44, "determining-quantization-parameters-encodings"]], "Quantization Schemes": [[44, "quantization-schemes"]], "Configuring Quantization Simulation Ops": [[44, "configuring-quantization-simulation-ops"]], "Quantization Simulation APIs": [[44, "quantization-simulation-apis"]], "Frequently Asked Questions": [[44, "frequently-asked-questions"]], "QuantSim v2": [[45, "quantsim-v2"]], "Using QuantSim v2": [[45, "using-quantsim-v2"]], "New Features": [[45, "new-features"]], "AIMET Release Notes": [[46, "aimet-release-notes"]], "1.22.2": [[46, "id1"]], "1.22.1": [[46, "id2"]], "1.22.0": [[46, "id3"]], "1.21.0": [[46, "id4"]], "1.20.0": [[46, "id5"]], "1.19.1.py37": [[46, "py37"]], "1.19.1": [[46, "id6"]], "1.18.0.py37": [[46, "id7"]], "1.18.0": [[46, "id8"]], "1.17.0.py37": [[46, "id9"]], "1.17.0": [[46, "id10"]], "1.16.2.py37": [[46, "id11"]], "1.16.2": [[46, "id12"]], "1.16.1.py37": [[46, "id13"]], "1.16.1": [[46, "id14"]], "1.16.0": [[46, "id15"]], "1.14.0": [[46, "id16"]], "1.13.0": [[46, "id17"]], "AIMET Spatial SVD": [[47, "aimet-spatial-svd"]], "AIMET Visualization": [[48, "aimet-visualization"]], "Design": [[48, "design"]], "Compression": [[48, "compression"]], "Starting a Bokeh Server Session:": [[48, "starting-a-bokeh-server-session"]], "How to use the tool": [[48, "how-to-use-the-tool"]], "AIMET Visualization for Quantization": [[49, "aimet-visualization-for-quantization"]], "Quantization": [[49, "quantization"]], "TensorFlow": [[49, "tensorflow"]], "AIMET Weight SVD": [[50, "aimet-weight-svd"]], "AIMET Winnowing": [[51, "aimet-winnowing"]], "Winnowing Overview": [[51, "winnowing-overview"]], "How Winnowing Works": [[51, "how-winnowing-works"]]}, "indexentries": {"fakequantizationmixin (class in aimet_torch.v2.nn)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.forward"]], "from_module() (aimet_torch.v2.nn.fakequantizationmixin class method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.from_module"]], "implements() (aimet_torch.v2.nn.fakequantizationmixin class method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.implements"]], "input_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.param_quantizers"]], "quantizationmixin (class in aimet_torch.v2.nn)": [[7, "aimet_torch.v2.nn.QuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.forward"]], "from_module() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.from_module"]], "get_default_kernel() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.get_default_kernel"]], "get_kernel() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.get_kernel"]], "implements() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.implements"]], "input_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.param_quantizers"]], "set_default_kernel() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.set_default_kernel"]], "set_kernel() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.set_kernel"]], "quantize (class in aimet_torch.v2.quantization.affine)": [[8, "aimet_torch.v2.quantization.affine.Quantize"]], "forward() (aimet_torch.v2.quantization.affine.quantize method)": [[8, "aimet_torch.v2.quantization.affine.Quantize.forward"]], "quantizedequantize (class in aimet_torch.v2.quantization.affine)": [[9, "aimet_torch.v2.quantization.affine.QuantizeDequantize"]], "forward() (aimet_torch.v2.quantization.affine.quantizedequantize method)": [[9, "aimet_torch.v2.quantization.affine.QuantizeDequantize.forward"]], "dequantize() (in module aimet_torch.v2.quantization.affine)": [[10, "aimet_torch.v2.quantization.affine.dequantize"]], "quantize() (in module aimet_torch.v2.quantization.affine)": [[11, "aimet_torch.v2.quantization.affine.quantize"]], "quantize_dequantize() (in module aimet_torch.v2.quantization.affine)": [[12, "aimet_torch.v2.quantization.affine.quantize_dequantize"]], "aimet_torch.v2.quantization.affine": [[13, "module-aimet_torch.v2.quantization.affine"]], "module": [[13, "module-aimet_torch.v2.quantization.affine"], [15, "module-aimet_torch.v2.quantization.float"]], "floatquantizedequantize (class in aimet_torch.v2.quantization.float)": [[14, "aimet_torch.v2.quantization.float.FloatQuantizeDequantize"]], "quantizedequantize (class in aimet_torch.v2.quantization.float)": [[14, "aimet_torch.v2.quantization.float.QuantizeDequantize"]], "aimet_torch.v2.quantization.float": [[15, "module-aimet_torch.v2.quantization.float"]], "dequantizedtensor (class in aimet_torch.v2.quantization.tensor)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor"]], "quantizedtensor (class in aimet_torch.v2.quantization.tensor)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor"]], "dequantize() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.dequantize"]], "dequantize() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.dequantize"]], "quantize() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.quantize"]], "quantize() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.quantize"]], "quantized_repr() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.quantized_repr"]], "quantized_repr() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.quantized_repr"]], "set_activation_quantizers_to_float() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_activation_quantizers_to_float"]], "set_blockwise_quantization_for_weights() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_blockwise_quantization_for_weights"]], "set_grouped_blockwise_quantization_for_weights() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_grouped_blockwise_quantization_for_weights"]], "encodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[18, "aimet_torch.v2.quantization.encoding_analyzer.EncodingAnalyzer"]], "minmaxencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[20, "aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer"]], "percentileencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[21, "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer"]], "set_percentile() (aimet_torch.v2.quantization.encoding_analyzer.percentileencodinganalyzer method)": [[21, "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.set_percentile"]], "sqnrencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[22, "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer"]], "compute_encodings_from_stats() (aimet_torch.v2.quantization.encoding_analyzer.sqnrencodinganalyzer method)": [[22, "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer.compute_encodings_from_stats"]], "basequantizationmixin (class in aimet_torch.v2.nn.base)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.forward"]], "input_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.param_quantizers"]], "quantize (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.Quantize"]], "quantizedequantize (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize"]], "quantizerbase (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase"]], "allow_overwrite() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.allow_overwrite"]], "compute_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.compute_encodings"]], "forward() (aimet_torch.v2.quantization.affine.quantizer.quantize method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.Quantize.forward"]], "forward() (aimet_torch.v2.quantization.affine.quantizer.quantizedequantize method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize.forward"]], "get_encoding() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.get_encoding"]], "get_legacy_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.get_legacy_encodings"]], "is_initialized() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.is_initialized"]], "register_quantization_parameter() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.register_quantization_parameter"]], "set_legacy_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.set_legacy_encodings"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["_templates/autosummary/class", "_templates/autosummary/function", "install/index", "install/install_docker", "install/install_host", "toplevelhidden", "torch_docs/api/nn.fake_quantization_mixin", "torch_docs/api/nn.quantization_mixin", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.Quantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.QuantizeDequantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.dequantize", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_dequantize", "torch_docs/api/quantization/affine/index", "torch_docs/api/quantization/float/FloatQuantizeDequantize", "torch_docs/api/quantization/float/index", "torch_docs/api/quantization/tensor", "torch_docs/blockwise_quantization", "torch_docs/encoding_analyzer", "torch_docs/examples/ptq", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer", "torch_docs/index", "torch_docs/quantized_modules", "torch_docs/quantizer", "torch_docs/tutorials/migration_guide", "torch_docs/tutorials/quickstart_guide", "user_guide/adaround", "user_guide/auto_quant", "user_guide/bn_reestimation", "user_guide/channel_pruning", "user_guide/compression_feature_guidebook", "user_guide/greedy_compression_ratio_selection", "user_guide/index", "user_guide/known_issues", "user_guide/model_compression", "user_guide/model_guidelines", "user_guide/model_quantization", "user_guide/post_training_quant_techniques", "user_guide/quant_analyzer", "user_guide/quantization_aware_training", "user_guide/quantization_configuration", "user_guide/quantization_feature_guidebook", "user_guide/quantization_sim", "user_guide/quantsim_2.0_overview", "user_guide/release_notes", "user_guide/spatial_svd", "user_guide/visualization_compression", "user_guide/visualization_quant", "user_guide/weight_svd", "user_guide/winnowing"], "filenames": ["_templates/autosummary/class.rst", "_templates/autosummary/function.rst", "install/index.rst", "install/install_docker.rst", "install/install_host.rst", "toplevelhidden.rst", "torch_docs/api/nn.fake_quantization_mixin.rst", "torch_docs/api/nn.quantization_mixin.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.Quantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.QuantizeDequantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.dequantize.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_.rst", "torch_docs/api/quantization/affine/generated/aimet_torch.v2.quantization.affine.quantize_dequantize.rst", "torch_docs/api/quantization/affine/index.rst", "torch_docs/api/quantization/float/FloatQuantizeDequantize.rst", "torch_docs/api/quantization/float/index.rst", "torch_docs/api/quantization/tensor.rst", "torch_docs/blockwise_quantization.rst", "torch_docs/encoding_analyzer.rst", "torch_docs/examples/ptq.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.rst", "torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer.rst", "torch_docs/index.rst", "torch_docs/quantized_modules.rst", "torch_docs/quantizer.rst", "torch_docs/tutorials/migration_guide.rst", "torch_docs/tutorials/quickstart_guide.rst", "user_guide/adaround.rst", "user_guide/auto_quant.rst", "user_guide/bn_reestimation.rst", "user_guide/channel_pruning.rst", "user_guide/compression_feature_guidebook.rst", "user_guide/greedy_compression_ratio_selection.rst", "user_guide/index.rst", "user_guide/known_issues.rst", "user_guide/model_compression.rst", "user_guide/model_guidelines.rst", "user_guide/model_quantization.rst", "user_guide/post_training_quant_techniques.rst", "user_guide/quant_analyzer.rst", "user_guide/quantization_aware_training.rst", "user_guide/quantization_configuration.rst", "user_guide/quantization_feature_guidebook.rst", "user_guide/quantization_sim.rst", "user_guide/quantsim_2.0_overview.rst", "user_guide/release_notes.rst", "user_guide/spatial_svd.rst", "user_guide/visualization_compression.rst", "user_guide/visualization_quant.rst", "user_guide/weight_svd.rst", "user_guide/winnowing.rst"], "titles": ["<no title>", "<no title>", "AIMET Installation", "AIMET Installation in Docker", "AIMET Installation and Setup", "<no title>", "FakeQuantizationMixin", "QuantizationMixin", "Quantize", "QuantizeDequantize", "dequantize", "quantize", "quantize_dequantize", "quantization.affine", "FloatQuantizeDequantize", "quantization.float", "quantization.tensor", "Blockwise Quantization", "Encoding Analyzers", "Post-Training Quantization", "MinMaxEncodingAnalyzer", "PercentileEncodingAnalyzer", "SqnrEncodingAnalyzer", "AIMET: AI Model Efficiency Toolkit Documentation", "Quantized Modules", "Quantizers", "Migrate to QuantSim v2", "Quickstart Guide", "AIMET AdaRound", "AIMET AutoQuant", "AIMET BN Re-estimation", "AIMET Channel Pruning", "AIMET Compression Features Guidebook", "AIMET Greedy Compression Ratio Selection", "AI Model Efficiency Toolkit User Guide", "AIMET Known Issues", "AIMET Model Compression", "Model Guidelines for PyTorch", "AIMET Model Quantization", "AIMET Post-Training Quantization Techniques", "AIMET QuantAnalyzer", "AIMET Quantization Aware Training", "Quantization Simulation Configuration", "AIMET Quantization Features Guidebook", "AIMET Quantization Simulation", "QuantSim v2", "AIMET Release Notes", "AIMET Spatial SVD", "AIMET Visualization", "AIMET Visualization for Quantization", "AIMET Weight SVD", "AIMET Winnowing"], "terms": {"name": [0, 1, 3, 4, 6, 7, 24, 25, 39, 44, 46, 48], "escap": [0, 1], "underlin": [0, 1], "qualcomm": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "innov": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "center": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "inc": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "ai": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "model": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "effici": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "toolkit": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "aimet_common": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "quantsim_config": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "default_config": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "json": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "1": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51], "33": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], "0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51], "The": [2, 3, 4, 6, 7, 11, 12, 14, 16, 17, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 51], "pytorch": [2, 3, 6, 7, 24, 30, 34, 40, 42, 44, 46], "gpu": [2, 3, 38, 46], "pypi": 2, "ar": [2, 3, 4, 6, 7, 8, 9, 14, 17, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51], "avail": [2, 7, 27, 37, 40, 42, 43, 45], "environ": 2, "meet": [2, 29, 32, 33], "follow": [2, 3, 4, 6, 7, 17, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51], "64": [2, 8, 17, 22, 25, 28], "bit": [2, 14, 17, 27, 28, 30, 38, 43, 44, 46], "intel": 2, "x86": 2, "compat": [2, 17, 27], "processor": 2, "linux": [2, 4], "ubuntu": [2, 4], "22": [2, 4, 27], "04": [2, 4], "lt": [2, 4], "python": [2, 3, 4, 26, 45], "3": [2, 11, 12, 16, 17, 22, 26, 27, 32, 38, 41, 43, 51], "10": [2, 3, 4, 6, 7, 8, 9, 11, 16, 17, 24, 25, 27, 33, 36, 41], "20": [2, 6, 7, 28, 41], "8": [2, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 38, 51], "cuda": [2, 3, 4, 27], "12": [2, 3, 4, 11, 17], "torch": [2, 3, 6, 7, 8, 9, 11, 12, 14, 16, 17, 23, 24, 25, 26, 27, 37, 46], "2": [2, 3, 9, 11, 12, 14, 16, 17, 25, 26, 28, 38, 43, 44], "pip": [2, 3, 4, 23], "apt": [2, 3, 4, 23], "get": [2, 3, 4, 28, 31, 38, 49], "liblapack": [2, 3, 4, 23], "python3": [2, 3, 4, 23], "m": [2, 3, 4, 23], "For": [2, 3, 4, 6, 7, 17, 23, 24, 26, 27, 28, 31, 32, 33, 34, 35, 36, 38, 40, 42, 44, 48, 51], "other": [2, 17, 26, 33, 35, 36, 38, 40, 43, 44, 46], "variant": [2, 4, 41, 44], "latest": [2, 3], "version": [2, 3, 4, 6, 7, 17, 24, 26, 27, 34, 45], "from": [2, 6, 7, 8, 9, 14, 16, 17, 21, 24, 25, 27, 28, 31, 32, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 51], "whl": [2, 3, 4], "file": [2, 3, 4, 17, 27, 38, 40, 41, 44, 46, 49], "host": [2, 3, 4, 46, 48], "http": [2, 3, 4, 32, 39, 46, 48], "github": [2, 3, 4, 32, 46], "com": [2, 3, 4, 46], "quic": [2, 3, 4, 32, 46], "11": [2, 3, 4, 11, 16], "x": [2, 14, 16, 24, 27, 32, 37, 40], "download": [2, 3, 4, 27], "aimet_torch": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 37, 45], "cu118": [2, 3, 4], "cp310": [2, 3, 4], "manylinux_2_34_x86_64": [2, 3, 4], "cpu": [2, 3, 27, 38, 46], "onli": [2, 3, 4, 11, 12, 16, 17, 24, 27, 30, 35, 38, 40, 41, 42, 46, 51], "13": [2, 3, 11], "cu117": 2, "tensorflow": [2, 3, 30, 34, 35, 42, 44, 46], "aimet_tensorflow": [2, 4], "onnx": [2, 3, 23, 34, 37, 38, 42], "14": [2, 11, 27], "aimet_onnx": [2, 4], "older": 2, "pleas": [2, 4, 23, 26, 27, 28, 31, 34, 36, 40, 44, 45], "brows": [2, 3, 4], "document": [2, 32, 34, 45, 46], "correspond": [2, 3, 4, 17, 24, 31, 33, 38, 40, 51], "select": [2, 3, 4, 29, 32, 40, 44, 48, 51], "appropri": [2, 4, 6, 7, 17, 24, 32, 33, 36, 43], "platform": [2, 38], "setup": [2, 26], "bash": [2, 3], "command": [2, 3, 4, 48], "shell": 2, "nvidia": [2, 3, 4], "card": 2, "comput": [2, 4, 6, 7, 14, 17, 21, 22, 27, 28, 36, 37, 38, 39, 40, 44, 48, 51], "capabl": [2, 24, 48, 49], "5": [2, 8, 9, 11, 12, 14, 17, 24, 25, 26, 32, 41, 43], "later": [2, 27], "docker": 2, "To": [2, 24, 26, 27, 30, 33, 36, 37, 40, 42, 43, 44, 45, 48, 49], "us": [2, 4, 6, 7, 8, 9, 16, 17, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 44, 46, 49], "acceler": [2, 23, 34, 36], "train": [2, 23, 28, 29, 30, 34, 36, 43, 44, 46], "modul": [2, 6, 7, 17, 23, 27, 28, 38, 45, 46, 51], "an": [2, 6, 7, 16, 17, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 38, 40, 41, 42, 43, 44, 45, 49, 51], "enabl": [2, 3, 17, 23, 26, 30, 34, 38, 40, 42, 44, 45, 46], "minimum": [2, 11, 12, 24], "driver": [2, 4], "455": 2, "i": [2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51], "alwai": [2, 33], "recommend": [2, 17, 26, 28, 30, 32, 38, 43], "especi": [2, 38, 41, 43], "newer": 2, "both": [2, 11, 12, 17, 23, 24, 26, 38, 39, 41, 42, 43, 44, 45, 47, 51], "cudnn": 2, "more": [2, 17, 23, 24, 26, 27, 31, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49], "interfac": 2, "support": [2, 17, 31, 32, 34, 35, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 50, 51], "There": [2, 26, 28, 37, 39, 41, 48, 49], "two": [2, 17, 24, 26, 27, 33, 34, 36, 38, 39, 40, 41, 44, 47, 48, 49, 50], "wai": [2, 17, 26, 27, 33], "On": [2, 7], "your": [2, 3, 4, 23, 26, 37, 45], "machin": [2, 3, 36], "our": [2, 4, 27, 33, 43, 44], "pre": [2, 3, 4, 34, 39], "built": [2, 3], "develop": [2, 3, 4, 7, 17, 24, 45], "imag": [2, 28, 40], "click": 2, "link": 2, "contain": [2, 6, 7, 16, 24, 27, 38, 40, 41, 42, 44], "thi": [3, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51], "page": [3, 4, 32, 44, 46], "provid": [3, 4, 7, 14, 17, 23, 24, 26, 27, 28, 32, 33, 36, 38, 39, 40, 42, 43, 44, 45, 48, 49, 51], "instruct": [3, 4, 23], "insid": [3, 6, 7, 24, 27], "variant_str": 3, "ONE": 3, "depend": [3, 4, 16, 26, 32, 33, 38, 42, 46], "desir": [3, 17, 27, 32, 36, 38, 43], "pt113": 3, "tf": [3, 40, 44, 46], "export": [3, 4, 23, 26, 30, 34, 36, 37, 38, 41, 44, 45, 46], "aimet_vari": 3, "one": [3, 17, 24, 27, 31, 36, 41, 42, 46, 47, 50], "workspac": 3, "absolute_path_to_workspac": 3, "docker_image_nam": 3, "artifact": [3, 27], "codelinaro": 3, "org": [3, 4, 39], "dev": [3, 4], "docker_container_nam": 3, "any_nam": 3, "note": [3, 4, 17, 26, 27, 31, 32, 33, 34, 36, 37, 38, 40], "feel": 3, "free": [3, 38, 39, 41], "modifi": [3, 4, 38, 44, 46, 51], "need": [3, 4, 17, 26, 27, 29, 32, 36, 38, 39, 40, 41, 42, 44, 46, 48, 49], "you": [3, 4, 26, 33, 37, 47, 50], "want": 3, "If": [3, 4, 6, 7, 8, 9, 11, 12, 14, 17, 22, 24, 25, 26, 27, 29, 37, 38, 39, 40, 42, 43, 48, 49, 51], "skip": [3, 31], "next": [3, 27, 43], "section": [3, 4, 17, 28, 30, 31, 36, 38, 44], "any_tag": 3, "t": [3, 28], "f": [3, 27], "jenkin": 3, "dockerfil": 3, "ensur": [3, 24, 38, 43], "alreadi": [3, 33, 43], "run": [3, 4, 8, 9, 17, 24, 25, 26, 30, 34, 36, 38, 39, 40, 44, 46, 48], "otherwis": [3, 4, 8, 9, 11, 12, 17, 25, 43], "remov": [3, 26, 27, 31, 34, 44, 51], "exist": [3, 6, 7, 17, 38, 44], "new": [3, 8, 9, 17, 23, 25, 26, 27, 38, 42, 46], "p": 3, "grep": 3, "kill": 3, "rm": 3, "u": [3, 43], "id": [3, 48], "user": [3, 17, 23, 24, 26, 28, 29, 32, 36, 38, 40, 41, 42, 43, 44, 45, 46, 48, 49], "g": [3, 27, 30, 32, 34, 43, 51], "v": [3, 17, 33], "etc": [3, 4, 32, 38], "passwd": 3, "ro": 3, "group": [3, 17, 42, 44], "home": 3, "mnt": 3, "entrypoint": 3, "bin": [3, 4, 22], "w": [3, 51], "hostnam": 3, "abov": [3, 4, 17, 23, 29, 30, 33, 34, 36, 37, 39, 43, 44, 51], "base": [3, 6, 7, 8, 9, 11, 12, 14, 17, 22, 24, 25, 26, 31, 32, 38], "filesystem": 3, "add": [3, 7, 24, 27, 42, 44, 46, 48, 49, 51], "all": [3, 6, 7, 17, 24, 26, 27, 31, 33, 36, 39, 40, 42, 43, 45], "order": [3, 4, 6, 17, 27, 30, 31, 32, 38, 41, 44, 49], "access": [3, 26, 38], "replac": [3, 17, 24, 26, 27, 39, 44], "port": [3, 26, 48], "forward": [3, 6, 7, 8, 9, 24, 25, 27, 37, 40, 43, 46], "done": [3, 8, 9, 25, 31, 36, 42, 44, 51], "visual": [3, 36, 38, 39, 40, 43, 46, 47, 50], "api": [3, 7, 26, 27, 28, 29, 34, 37, 38, 40, 42, 45, 46, 48], "can": [3, 6, 8, 9, 16, 17, 23, 24, 25, 26, 27, 29, 30, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50], "achiev": [3, 17, 28, 32, 33, 47, 50], "port_id": 3, "ani": [3, 7, 17, 26, 27, 28, 29, 42, 46], "number": [3, 6, 7, 11, 12, 14, 17, 22, 24, 28, 33, 34, 36, 41, 44, 46, 48, 51], "default": [3, 4, 7, 8, 9, 11, 12, 24, 25, 28, 33, 36, 42, 44, 46, 48], "mai": [3, 4, 6, 7, 16, 17, 24, 28, 32, 36, 38, 39, 40, 42, 43, 44], "go": [3, 4, 27, 45, 48], "project": [3, 4], "requir": [3, 4, 17, 27, 28, 30, 32, 36, 38, 39, 42, 44], "each": [3, 4, 6, 7, 17, 24, 25, 27, 31, 32, 33, 38, 39, 40, 41, 42, 43, 44, 49, 51], "identifi": [3, 4, 17, 40, 43, 46, 51], "wish": [3, 4], "some": [3, 4, 24, 26, 27, 28, 32, 33, 36, 37, 38, 39, 41, 43, 44], "tip": [3, 4], "32": [3, 4, 8, 25, 43], "post1": [3, 4], "7": [3, 4, 11, 12, 14, 27, 51], "31": [3, 4], "prepend": [3, 4], "sudo": [3, 4], "y": [3, 4, 27, 40], "we": [3, 4, 17, 24, 26, 27, 33, 36, 38, 39, 42, 43, 44, 45, 49], "also": [3, 4, 17, 31, 32, 33, 38, 40, 42, 43, 44, 46, 48, 49, 51], "wheel": [3, 4], "differ": [3, 4, 17, 26, 31, 33, 36, 38, 39, 41, 42, 43, 44, 45], "which": [3, 4, 7, 8, 9, 11, 12, 16, 17, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 36, 38, 39, 40, 42, 44, 45, 46, 47, 48, 49, 50], "tag": [3, 4, 46], "below": [3, 4, 8, 9, 11, 12, 17, 24, 25, 26, 27, 29, 30, 38, 39, 42, 43, 44, 51], "detail": [3, 4, 31, 33, 34, 36, 38, 43, 44, 48, 49], "ex": [3, 4, 17], "release_tag": [3, 4], "construct": [3, 4, 27, 37], "root": [3, 4, 27], "url": [3, 4, 48], "download_url": [3, 4], "extens": [3, 4, 24], "wheel_file_nam": [3, 4], "specifi": [3, 4, 8, 9, 11, 12, 14, 17, 25, 27, 29, 36, 42, 44, 49], "automat": [3, 4, 17, 32, 36, 38, 40, 46], "common": [3, 17, 26, 43, 49], "variabl": [3, 4, 8, 9, 25], "sourc": [3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26, 43], "usr": [3, 4], "lib": [3, 4], "dist": [3, 4], "envsetup": [3, 4], "sh": [3, 4], "unless": [4, 7, 51], "pend": 4, "pip3": 4, "h": [4, 50, 51], "These": [4, 24, 26, 27, 29, 30, 31, 32, 37, 38, 39, 40, 43, 44], "assum": [4, 17], "path": 4, "local": [4, 48], "case": [4, 11, 12, 17, 24, 26, 27, 33, 39, 41, 42], "accordingli": 4, "basic": [4, 23, 27, 45], "requisit": 4, "updat": [4, 25, 26, 38, 39, 41, 44, 46], "upgrad": 4, "ye": [4, 36], "wget": 4, "gnupg2": 4, "have": [4, 7, 17, 26, 27, 33, 36, 38, 39, 40, 43, 44, 45], "multipl": [4, 17, 24, 34, 36, 38, 46], "set": [4, 6, 7, 17, 21, 24, 25, 26, 28, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 51], "altern": [4, 17, 36], "do": [4, 27, 36, 40, 44], "were": [4, 32, 38, 42, 45, 51], "test": 4, "sub": [4, 31, 36, 44, 51], "visit": [4, 23, 34], "archiv": 4, "obtain": [4, 31, 32, 40, 44, 45], "correct": [4, 27, 28, 30, 38, 39, 43], "exact": [4, 24, 30], "up": [4, 17, 36, 41, 42, 44, 45, 51], "date": 4, "repo": 4, "ubuntu2204": 4, "x86_64": 4, "pin": 4, "mv": 4, "prefer": [4, 36], "d": [4, 8, 9, 11, 12, 25], "repositori": 4, "600": 4, "local_instal": 4, "local_11": 4, "520": 4, "61": 4, "05": [4, 11, 12, 27], "1_amd64": 4, "deb": 4, "kei": 4, "adv": 4, "fetch": 4, "3bf863cc": 4, "pub": 4, "dpkg": 4, "cp": [4, 32], "var": 4, "keyr": 4, "gpg": 4, "share": [4, 24], "echo": 4, "list": [4, 11, 12, 17, 22, 24, 25, 26, 33, 35, 37, 42], "515": 4, "65": [4, 32], "01": [4, 11, 12, 28], "cat": 4, "reqs_deb_common": 4, "txt": 4, "xarg": 4, "reqs_deb_torch_common": 4, "reqs_deb_onnx_common": 4, "reqs_deb_tf_gpu": 4, "reqs_deb_torch_gpu": 4, "reqs_deb_onnx_gpu": 4, "option": [4, 7, 8, 9, 11, 12, 17, 22, 23, 25, 27, 28, 40, 42, 44, 45, 48], "uninstal": 4, "cach": 4, "dir": 4, "9": [4, 11, 12, 16, 43], "onnxruntime_v": 4, "c": [4, 32], "import": [4, 7, 8, 9, 11, 12, 14, 16, 17, 23, 24, 25, 27, 30, 31, 43], "print": [4, 6, 7, 11, 12, 24, 26, 27, 38, 40], "__version__": 4, "ln": 4, "": [4, 6, 7, 17, 23, 24, 25, 27, 32, 35, 36, 38, 39, 40, 41, 43, 44, 48, 49, 51], "gnu": 4, "libjpeg": 4, "so": [4, 24, 26, 37, 40, 48], "chose": 4, "between": [4, 17, 24, 26, 39, 40, 42, 44, 45], "class": [6, 7, 8, 9, 14, 17, 18, 20, 21, 22, 25, 26, 27], "v2": [6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 24, 25, 27], "nn": [6, 7, 8, 9, 17, 23, 24, 25, 26, 27, 37, 45, 46], "arg": [6, 7, 11, 12, 16, 17, 24], "kwarg": [6, 7, 11, 12, 16, 24], "mixin": [6, 7, 24], "implement": [6, 7, 24, 26, 37, 43, 45], "fake": [6, 7, 9, 12, 14, 24, 25, 27, 45], "quantiz": [6, 7, 9, 10, 12, 14, 18, 20, 21, 22, 23, 28, 29, 30, 32, 34, 36, 40, 45, 46, 48], "top": [6, 7, 31, 48], "regular": [6, 7, 24, 28, 38, 44], "specif": [6, 17, 27, 28, 29, 30, 32, 34, 36, 37, 38, 39, 42, 46], "input": [6, 7, 8, 9, 11, 12, 14, 17, 24, 25, 26, 27, 31, 36, 40, 42, 44, 47, 48, 50, 51], "output": [6, 7, 8, 9, 11, 17, 24, 25, 26, 27, 31, 36, 39, 40, 42, 44, 46, 47, 50, 51], "paramet": [6, 7, 8, 9, 11, 12, 14, 16, 17, 21, 22, 24, 25, 26, 27, 28, 30, 31, 36, 37, 38, 39, 40, 41, 42, 45, 49], "tensor": [6, 7, 8, 9, 10, 11, 12, 14, 17, 22, 24, 25, 27, 28, 31, 37, 38, 40, 42, 43, 44, 46, 47, 50], "its": [6, 7, 16, 23, 24, 27, 34, 38, 40, 44, 51], "held": [6, 27], "quantizerbas": [6, 7, 24, 25], "object": [6, 7, 16, 17, 22, 24, 25, 27, 30, 38, 41, 44], "dure": [6, 24, 27, 28, 34, 36, 38, 41, 42, 44, 48, 49], "method": [6, 7, 17, 24, 26, 27, 33, 36, 38, 43, 44], "inherit": [6, 24], "layer": [6, 7, 17, 24, 27, 28, 29, 30, 31, 32, 35, 37, 38, 39, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51], "oper": [6, 7, 24, 26, 27, 37, 38, 39, 42, 43], "none": [6, 7, 8, 9, 10, 11, 12, 14, 17, 24, 25, 26, 27, 48], "behav": [6, 7, 24, 43], "exactli": [6, 7, 24, 44], "same": [6, 7, 16, 17, 24, 25, 26, 30, 39, 42, 44, 45, 49], "parent": [6, 7], "A": [6, 17, 22, 24, 32, 38, 40, 41, 42, 43, 44], "initi": [6, 7, 8, 9, 14, 24, 25, 28, 41, 43, 44], "scratch": 6, "syntax": 6, "form": 6, "from_modul": [6, 7], "input_quant": [6, 7, 24, 26, 27], "modulelist": [6, 7, 24, 26, 27], "appli": [6, 7, 8, 9, 11, 12, 17, 24, 25, 27, 28, 29, 30, 33, 36, 38, 39, 41, 42, 43, 44, 45, 46, 48, 49], "type": [6, 7, 8, 9, 16, 17, 22, 24, 25, 26, 38, 40, 42, 44, 48], "output_quant": [6, 7, 24, 26, 27], "param_quant": [6, 7, 17, 24, 26, 27], "moduledict": [6, 7, 24, 26, 27], "map": [6, 7, 11, 12, 16, 17, 24, 40, 42], "associ": [6, 7, 24, 38], "exampl": [6, 7, 8, 9, 11, 12, 14, 16, 17, 24, 25, 27, 28, 32, 33, 34, 38, 40, 42, 44, 46, 51], "qlinear": [6, 7, 24, 26], "fakequantizedlinear": [6, 24], "in_featur": [6, 7, 24, 26, 27], "out_featur": [6, 7, 24, 26, 27], "bia": [6, 7, 14, 26, 27, 28, 31, 38, 39, 42, 43, 46], "fals": [6, 7, 8, 9, 11, 12, 16, 17, 24, 25, 26, 27, 37, 42], "weight": [6, 7, 17, 22, 24, 26, 27, 28, 30, 32, 36, 38, 39, 40, 41, 42, 43, 44, 49], "linear": [6, 7, 17, 24, 26, 27, 30, 31], "true": [6, 7, 8, 9, 14, 16, 17, 22, 24, 25, 26, 27, 37, 42], "abstract": [6, 7, 24, 25], "should": [6, 7, 17, 24, 26, 27, 32, 36, 42, 48, 51], "perform": [6, 7, 8, 9, 17, 24, 25, 27, 29, 30, 31, 32, 33, 36, 38, 39, 40, 41, 43, 45], "logic": [6, 7, 46], "param": [6, 17, 25, 42], "call": [6, 7, 14, 16, 17, 24, 27, 30, 36, 38, 40, 42, 44, 46, 47, 50], "pass": [6, 7, 17, 23, 24, 26, 27, 34, 37, 38, 39, 40, 41, 43, 44, 46, 48], "__quant_init__": [6, 7, 24], "invok": [6, 7, 24, 36, 38, 48, 49], "right": [6, 7, 8, 9, 11, 12, 14, 24, 25, 38, 51], "after": [6, 7, 24, 27, 28, 29, 30, 32, 36, 38, 41, 43, 48, 49], "__init__": [6, 7, 24, 27], "structur": [6, 7, 24, 36], "size": [6, 7, 8, 9, 11, 12, 17, 24, 25, 28, 36, 37, 47, 50], "initializd": [6, 7, 24], "custom": [6, 7, 24, 43, 44, 45], "overridden": [6, 7, 24], "length": [6, 7, 17, 22, 24], "given": [6, 7, 17, 24, 29, 31, 33, 34, 36, 39, 47, 48, 50], "compute_encod": [6, 7, 8, 9, 14, 16, 23, 24, 25, 26, 27, 45], "enter": [6, 7, 24, 29], "context": [6, 7, 24, 27], "observ": [6, 7, 18, 21, 24, 25, 27, 33, 36, 38, 39, 40, 41, 44], "encod": [6, 7, 8, 9, 16, 17, 20, 21, 22, 23, 25, 26, 27, 28, 30, 38, 40, 41, 45, 46], "upon": [6, 7, 24, 27], "exit": [6, 7, 24, 27], "quantizedlinear": [6, 7, 17, 24, 26, 27], "symmetr": [6, 7, 8, 9, 16, 17, 22, 24, 25, 26, 27, 42, 44], "randn": [6, 7, 8, 9, 16, 24, 25], "16": [6, 7, 8, 14, 17, 24, 25, 28], "is_initi": [6, 7, 8, 9, 14, 24, 25], "classmethod": [6, 7], "creat": [6, 7, 23, 24, 27, 28, 30, 36, 37, 38, 41, 44], "instanc": [6, 7, 48], "result": [6, 7, 16, 17, 22, 28, 29, 31, 32, 34, 39, 40, 41, 42, 44], "attribut": [6, 7, 24, 26, 40], "origin": [6, 7, 24, 26, 27, 31, 32, 36, 38, 39, 40, 41, 44, 48], "assign": [6, 7, 8, 9, 24, 25], "float": [6, 7, 14, 16, 17, 23, 24, 38, 40, 43, 44, 45, 49], "point": [6, 7, 16, 17, 23, 24, 26, 34, 36, 38, 40, 43, 44, 45, 49], "return": [6, 7, 8, 9, 16, 17, 22, 23, 25, 27, 29, 33, 34, 40, 44], "quantized_linear": [6, 7], "module_cl": [6, 7], "decor": [6, 7], "regist": [6, 7, 24, 25], "defin": [6, 17, 24, 27, 37, 38, 40, 42, 44], "featur": [7, 17, 24, 28, 29, 30, 36, 39, 40, 44, 46, 48, 49], "under": [7, 17, 24, 26, 40, 42, 48, 49], "heavi": [7, 17, 24, 48, 49], "chang": [7, 17, 24, 27, 28, 36, 40, 41, 42, 44, 45, 49, 51], "occur": [7, 17, 24], "without": [7, 14, 16, 17, 24, 29, 38, 41, 44, 51], "notic": [7, 17, 24, 36], "futur": [7, 17, 24, 45], "verion": 7, "function": [7, 11, 12, 16, 17, 24, 26, 27, 28, 33, 36, 37, 38, 40, 44, 46, 48, 49], "behavior": [7, 24, 26, 27, 34, 45], "fakequantizationmixin": [7, 23, 24], "abil": [7, 46], "kernel": [7, 17, 24, 31, 45, 47, 50], "place": [7, 17, 41, 42], "ha": [7, 16, 17, 26, 27, 32, 33, 36, 39, 41, 44, 48, 51], "been": [7, 16, 17, 38, 41, 44, 45, 51], "within": [7, 16, 24, 32, 40, 44], "fall": [7, 33, 42], "back": [7, 16, 27, 42], "equival": [7, 11, 12, 14, 17, 27], "e": [7, 27, 30, 32, 34, 41, 43, 51], "get_kernel": 7, "doe": [7, 24, 26, 27, 33, 35, 38, 43], "retriev": 7, "well": [7, 16, 17, 24, 32, 36, 38, 39, 40, 44, 47], "dequant": [7, 9, 12, 16, 23, 24, 25, 44], "set_kernel": 7, "signatur": [7, 11, 12], "must": [7, 17, 24, 30, 34, 35, 40, 42, 51], "match": [7, 17, 31, 36, 40, 42, 43, 44, 51], "In": [7, 17, 24, 26, 27, 28, 29, 32, 33, 36, 38, 39, 41, 42, 44, 45, 49, 51], "gener": [7, 8, 9, 11, 12, 17, 25, 27, 36, 38, 40, 41, 42, 44], "quantizedtensor": [7, 8, 16, 25], "take": [7, 17, 27, 34, 36, 38, 39, 41, 42, 43, 51], "addit": [7, 17, 29, 38, 41, 42, 46], "keyword": 7, "argument": [7, 14, 17], "output_encod": 7, "onc": [7, 30, 31, 36, 40, 41, 44], "callabl": [7, 17], "underli": [7, 43], "q": [7, 8, 9, 11, 12, 14, 16, 24, 25, 26, 44], "def": [7, 26, 27], "int_multipli": 7, "b": [7, 8, 9, 11, 12, 25], "enc": 7, "affin": [7, 8, 9, 10, 11, 12, 16, 17, 23, 24, 25, 27, 45], "rais": 7, "notimplementederror": 7, "q_output": 7, "quantized_repr": [7, 16], "offset": [7, 8, 9, 10, 11, 12, 17, 22, 25, 38, 40, 41, 44], "dq_output": 7, "scale": [7, 8, 9, 10, 11, 12, 14, 16, 17, 25, 30, 38, 39, 40, 41, 44], "qmult": 7, "quantizedmultipli": [7, 24], "set_default_kernel": 7, "quantized_forward": 7, "cl": [7, 46], "get_default_kernel": 7, "current": [7, 31, 34, 35, 36, 37, 42, 47, 50], "shape": [8, 9, 14, 16, 17, 20, 21, 22, 24, 25, 26, 27, 40], "bitwidth": [8, 9, 11, 12, 14, 16, 17, 24, 25, 26, 27, 30, 38, 43, 44], "encoding_analyz": [8, 9, 14, 18, 20, 21, 22, 25], "block_siz": [8, 9, 10, 11, 12, 17, 25], "precis": [8, 9, 11, 12, 14, 23, 25, 38], "out": [8, 9, 11, 12, 14, 25, 29, 32, 36, 40], "clamp": [8, 9, 11, 12, 14, 25, 44], "left": [8, 9, 11, 12, 14, 25, 33, 51], "lceil": [8, 9, 11, 12, 14, 25], "frac": [8, 9, 11, 12, 14, 25], "rfloor": [8, 9, 11, 12, 14, 25], "qmin": [8, 9, 11, 12, 25, 44], "qmax": [8, 9, 11, 12, 25, 44], "where": [8, 9, 11, 12, 14, 24, 25, 27, 30, 33, 40, 41, 47, 50, 51], "deriv": [8, 9, 11, 12, 24, 25], "learnabl": [8, 9, 25], "theta_": [8, 9, 25], "min": [8, 9, 20, 22, 24, 25, 26, 27, 40, 44], "max": [8, 9, 14, 20, 22, 24, 25, 26, 27, 36, 39, 40, 44], "block": [8, 9, 11, 12, 17, 25, 26, 45], "begin": [8, 9, 11, 12, 25, 41, 42], "pmatrix": [8, 9, 11, 12, 25], "b_0": [8, 9, 11, 12, 25], "b_1": [8, 9, 11, 12, 17, 25], "cdot": [8, 9, 11, 12, 25], "b_": [8, 9, 11, 12, 25], "end": [8, 9, 11, 12, 25, 27, 36], "equat": [8, 9, 11, 12, 17, 25, 44], "further": [8, 9, 11, 12, 16, 17, 25, 26, 27, 31, 34, 36, 38, 42], "out_": [8, 9, 11, 12, 25], "j_0": [8, 9, 11, 12, 25], "j_": [8, 9, 11, 12, 25], "input_": [8, 9, 11, 12, 25], "scale_": [8, 9, 11, 12, 25], "i_0": [8, 9, 11, 12, 25], "i_": [8, 9, 11, 12, 25], "offset_": [8, 9, 11, 12, 25], "text": [8, 9, 11, 12, 25], "quad": [8, 9, 11, 12, 25, 44], "forall_": [8, 9, 11, 12, 25], "leq": [8, 9, 11, 12, 25], "i_d": [8, 9, 11, 12, 25], "lfloor": [8, 9, 11, 12, 14, 25], "j_d": [8, 9, 11, 12, 25], "b_d": [8, 9, 11, 12, 25], "tupl": [8, 9, 11, 12, 17, 22, 25], "int": [8, 9, 11, 12, 14, 17, 22, 25, 26], "bool": [8, 9, 11, 12, 17, 22, 25], "asymmetr": [8, 9, 22, 25, 42, 44], "encodinganalyz": [8, 9, 14, 18, 25], "analyz": [8, 9, 20, 21, 22, 23, 24, 25, 29, 31, 36, 37, 40, 44, 45, 48, 49], "calibr": [8, 9, 17, 20, 21, 22, 23, 24, 25, 27, 38, 40, 41, 43, 44], "absolut": [8, 9, 25], "cannot": [8, 9, 25], "until": [8, 9, 25, 29], "properli": [8, 9, 25, 27], "statist": [8, 9, 14, 24, 25, 27, 30, 38, 40, 49], "manual": [8, 9, 25, 26, 29, 36], "valu": [8, 9, 11, 12, 14, 16, 17, 21, 22, 25, 27, 28, 33, 36, 38, 39, 40, 41, 44, 47, 49, 50], "see": [8, 9, 24, 25, 26, 27, 31, 33, 34, 36, 38, 42, 43, 44, 47, 48, 49, 50], "_": [8, 9, 12, 23, 24, 25, 27], "129": [8, 25, 37], "255": [8, 16, 25], "122": [8, 25], "192": [8, 25], "106": [8, 25], "94": [8, 25], "145": [8, 25], "181": [8, 25], "144": [8, 25], "194": [8, 25], "74": [8, 25], "86": [8, 25], "150": [8, 25], "103": [8, 25], "37": [8, 25], "111": [8, 25], "237": [8, 25], "218": [8, 25], "49": [8, 25], "155": [8, 25], "179": [8, 25], "66": [8, 25, 32], "89": [8, 25], "110": [8, 25], "17": [8, 22, 25], "36": [8, 25], "83": [8, 25], "grad_fn": [8, 9, 16, 25], "aliasbackward0": [8, 9, 16, 25], "ones_lik": [8, 9, 25], "187": [8, 25], "186": [8, 25], "131": [8, 25], "203": [8, 25], "80": [8, 25], "143": [8, 25], "152": [8, 25], "226": [8, 25], "55": [8, 25], "172": [8, 25], "207": [8, 25], "146": [8, 25], "216": [8, 25], "238": [8, 25], "141": [8, 25], "178": [8, 25], "188": [8, 25], "63": [8, 25], "59": [8, 25], "19": [8, 25], "162": [8, 25], "30": [8, 25], "109": [8, 25], "overlin": [9, 12, 25], "qdq": [9, 14, 25], "dequantizedtensor": [9, 16, 25], "2771": [9, 25], "3038": [9, 25], "0819": [9, 25], "9700": [9, 25], "9487": [9, 25], "1307": [9, 25], "7894": [9, 25], "1709": [9, 25], "2212": [9, 25], "7741": [9, 25], "0295": [9, 25], "2265": [9, 25], "0564": [9, 25], "6177": [9, 25], "0386": [9, 25], "0176": [9, 25], "6054": [9, 25], "8836": [9, 25], "1232": [9, 25], "8229": [9, 25], "5540": [9, 25], "3992": [9, 25], "2363": [9, 25], "2546": [9, 25], "0036": [9, 25], "2355": [9, 25], "1741": [9, 25], "6079": [9, 25], "6247": [9, 25], "0115": [9, 25], "2458": [9, 25], "9157": [9, 25], "4694": [9, 25], "0639": [9, 25], "2568": [9, 25], "0680": [9, 25], "6695": [9, 25], "7932": [9, 25], "1889": [9, 25], "0158": [9, 25], "5695": [9, 25], "5220": [9, 25], "1977": [9, 25], "4475": [9, 25], "0424": [9, 25], "1128": [9, 25], "8796": [9, 25], "1060": [9, 25], "5897": [9, 25], "6196": [9, 25], "9961": [9, 25], "0549": [9, 25], "6431": [9, 25], "0039": [9, 25], "8706": [9, 25], "4706": [9, 25], "2353": [9, 25], "8078": [9, 25], "3451": [9, 25], "1176": [9, 25], "4549": [9, 25], "0471": [9, 25], "5255": [9, 25], "4157": [9, 25], "0784": [9, 25], "5333": [9, 12, 25], "1647": [9, 25], "2118": [9, 25], "2196": [9, 25], "9176": [9, 25], "9490": [9, 25], "7765": [9, 25], "4784": [9, 25], "6039": [9, 25], "3137": [9, 25], "3216": [9, 25], "8000": [9, 12, 25], "4392": [9, 25], "4863": [9, 25], "overload": [11, 12], "sign": [11, 12, 26, 44], "rceil": [11, 12], "posit": [11, 12], "integ": [11, 12, 17, 28, 38, 40], "rang": [11, 12, 21, 22, 27, 28, 30, 33, 38, 39, 40, 41, 43, 44, 46, 49], "over": [11, 12, 22, 24, 28, 33, 36, 49], "neg": [11, 12, 17, 24], "num_step": [11, 12, 22], "num": [11, 12], "_step": [11, 12], "step": [11, 12, 23, 27, 28, 29, 30, 31, 32, 33, 36, 38, 39, 41, 43, 44], "maximum": [11, 12, 14, 22, 24], "arang": [11, 12], "start": [11, 12, 27, 28, 33, 36, 42, 44], "0000e": [11, 12], "5000e": [11, 12], "02": [11, 12], "1921e": [11, 12], "08": [11, 12], "4": [11, 12, 16, 17, 26, 27, 30, 33, 38, 51], "6": [11, 12, 17, 41], "00": [11, 12], "0500e": [11, 12], "1000e": [11, 12], "1500e": [11, 12], "2000e": [11, 12], "2500e": [11, 12], "15": [11, 12, 36, 41], "0000": [12, 16], "0667": 12, "1333": 12, "2000": [12, 16], "2667": 12, "3333": 12, "4000": [12, 16], "4667": 12, "6000": [12, 16], "6667": 12, "7333": 12, "8667": 12, "9333": 12, "exponent_bit": [14, 17], "mantissa_bit": [14, 17], "dtype": [14, 16, 17, 26], "simul": [14, 17, 23, 24, 27, 34, 38, 41, 45, 46], "cast": [14, 24], "expon": [14, 17], "mantissa": [14, 17], "x_c": 14, "log_2": 14, "ieee": [14, 36, 39], "standard": [14, 24], "represent": [14, 16], "_max": 14, "mutual": [14, 17], "exclus": [14, 17], "repres": [14, 16, 24, 25, 27, 33, 38, 39, 40, 41, 44], "determin": [14, 17, 24, 27, 29, 32, 36, 38, 39, 40], "dynam": [14, 39, 44, 46, 49], "finer": [14, 17, 45], "8998": 14, "0947": 14, "0891": 14, "1727": 14, "unlik": 14, "affinequant": [14, 26], "floatquant": [14, 26], "is_bfloat16": 14, "8984": 14, "0859": 14, "1729": 14, "minmaxencodinganalyz": [14, 23, 26], "float16": [14, 17, 26], "is_float16": 14, "8994": 14, "0889": 14, "alia": 14, "hold": [16, 17, 24, 42], "store": [16, 17], "along": [16, 17, 27, 41, 44], "encodingbas": [16, 25], "inform": [16, 26, 38, 40], "necessari": [16, 17, 27, 48], "real": 16, "self": [16, 22, 27], "produc": [16, 17, 22, 33, 40, 45, 48], "57": 16, "312": 16, "153": 16, "205": 16, "set_rang": 16, "128": [16, 17, 27], "127": 16, "x_q": 16, "26": 16, "23": 16, "x_dq": 16, "3000": 16, "equal": [16, 17, 22, 24, 28, 29, 32, 33, 37, 38, 39, 40, 49], "data": [16, 17, 23, 26, 27, 28, 30, 35, 38, 39, 40, 41, 43, 44], "rtype": 16, "abl": [16, 27, 28, 48, 49], "carri": 16, "gradient": 16, "thu": 16, "autograd": 16, "allow": [16, 17, 24, 29, 34, 36, 38, 40, 41, 42, 43, 44, 45, 46, 48], "backpropag": 16, "requires_grad": 16, "38": [16, 36], "28": 16, "40": 16, "int8": [16, 41, 44, 49], "subsequ": [16, 37, 39, 41, 42], "about": [16, 27, 45], "wa": [16, 31, 36, 42], "With": [16, 45], "convert": [16, 27, 29, 38, 49], "loss": [16, 23, 27, 28, 34, 38, 40, 44], "39": [16, 27], "51": 16, "521": 16, "41": 16, "quant_dequ": 16, "quantizedequant": [16, 17, 23, 24, 25, 26, 27], "x_qdq": 16, "52": 16, "68": 16, "97": 16, "uint8": 16, "when": [17, 21, 23, 24, 27, 28, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 51], "known": [17, 33, 34], "like": [17, 23, 26, 27, 34, 36, 38, 40, 41, 42, 45, 48], "grid": [17, 45], "counterpart": [17, 24], "process": [17, 22, 23, 27, 29, 34, 36, 38, 39, 44, 45], "particular": [17, 38, 42], "choos": [17, 31, 32, 36], "come": [17, 41, 44], "cover": [17, 30, 42, 44], "whole": [17, 44], "split": [17, 22, 24], "describ": [17, 26, 38, 39, 43, 44], "sever": [17, 24, 32], "pro": 17, "con": 17, "per": [17, 22, 24, 30, 38, 39, 40, 42, 43, 44, 46], "entir": [17, 27, 33, 36], "collect": [17, 31, 40], "singl": [17, 28, 39], "benefit": [17, 28], "includ": [17, 26, 30, 36, 38, 40, 42, 44, 46], "less": [17, 22, 24, 31, 33], "storag": 17, "space": 17, "drawback": 17, "outlier": [17, 40, 44], "affect": [17, 30, 42, 51], "channel": [17, 24, 30, 32, 33, 35, 36, 39, 40, 42, 43, 44, 46, 47, 49, 50, 51], "individu": [17, 30, 31, 32, 33, 36, 38, 40, 43], "typic": [17, 27, 32, 38, 40, 41, 42, 44, 48], "dimens": [17, 24, 36, 43, 47, 50], "compar": [17, 27, 40, 41, 49], "would": [17, 26, 32, 36, 42, 46, 48], "influenc": 17, "resid": [17, 46], "chunk": 17, "across": [17, 39, 40], "improv": [17, 26, 27, 32, 38, 41, 43, 49], "granular": [17, 36, 43, 44, 45, 49], "found": [17, 26, 41, 44, 45], "isol": 17, "optim": [17, 27, 28, 29, 34, 36, 38, 41, 44, 45, 46, 48], "cost": [17, 33, 36, 41], "increas": [17, 33, 39, 42], "favor": 17, "possibl": [17, 27, 40, 42, 43], "similarli": [17, 43], "lead": [17, 28, 30, 39, 43, 44], "better": [17, 28, 29, 38, 39, 41], "accuraci": [17, 23, 27, 28, 29, 32, 33, 34, 36, 38, 39, 40, 41, 43, 44, 46, 49, 51], "activ": [17, 24, 27, 38, 40, 41, 42, 43, 44], "runtim": [17, 23, 27, 32, 34, 36, 38, 40, 42, 44, 46], "part": [17, 36, 38, 39, 40], "basi": [17, 33, 36], "instanti": [17, 27, 41, 48], "relationship": 17, "actual": [17, 32, 38], "being": [17, 26], "rule": [17, 42], "most": [17, 27, 42], "long": 17, "b_2": 17, "b_n": 17, "s_1": 17, "s_2": 17, "s_n": 17, "satisfi": [17, 27, 29], "n": [17, 27, 46], "word": 17, "evenli": 17, "divid": [17, 24, 41], "valid": [17, 29, 38, 46], "sinc": [17, 30, 32, 33, 44], "divis": 17, "permit": 17, "essenti": [17, 23], "invalid": 17, "combin": [17, 29, 32, 36, 38, 39], "though": [17, 42], "3d": 17, "final": [17, 31, 32, 33, 41, 43, 48], "infer": [17, 23, 27, 30, 32, 34, 39, 41, 44, 46], "while": [17, 24, 28, 33, 37, 38, 41, 43, 44, 45, 48], "arbitrari": 17, "experiment": [17, 26, 36, 42, 45], "purpos": [17, 42], "restrict": [17, 37], "constraint": 17, "still": [17, 38, 43], "themselv": [17, 41], "code": [17, 27, 28, 45], "show": [17, 23, 27, 34, 39, 43], "how": [17, 24, 26, 27, 36, 39, 40, 43, 44, 45], "configur": [17, 27, 32, 35, 46], "convolut": [17, 27, 30, 32, 36, 43], "sim": [17, 23, 26, 27, 41, 44], "conv_1": 17, "refer": [17, 26, 28, 34, 38, 40, 41, 42, 44, 45], "quantizedconv2d": [17, 24, 26, 27], "work": [17, 30, 36, 37, 39, 42], "too": 17, "linear_1": 17, "scheme": [17, 29, 30, 33, 36, 40], "lower": [17, 26, 33, 38, 43], "adjust": [17, 30, 31, 32, 38, 39, 43], "thei": [17, 42, 45, 48], "lie": 17, "higher": [17, 22, 30, 33, 41, 43], "leverag": 17, "than": [17, 26, 27, 35, 41, 48], "due": [17, 38, 39], "fact": 17, "expans": [17, 36], "factor": [17, 22, 32, 36, 39], "fashion": 17, "groupedblockquantizedequant": 17, "introduc": [17, 38, 42, 44], "decompressed_bw": 17, "expand": [17, 24], "greater": [17, 24], "block_group": 17, "togeth": [17, 36], "As": [17, 29, 31, 32, 33, 36, 38, 39, 40, 44, 47, 50], "except": 17, "make": [17, 24, 33, 36, 37, 38, 44, 45], "easier": [17, 26, 45], "quantsim": [17, 23, 38, 41, 42, 46], "config_util": 17, "set_blockwise_quantization_for_weight": 17, "quantizationsimmodel": [17, 23, 27, 28, 30], "consist": [17, 26, 29, 44, 51], "either": [17, 34, 44], "whose": [17, 26, 39, 42, 45, 51], "union": 17, "arrai": 17, "in_channel": [17, 27], "out_channel": [17, 27], "conv": [17, 35, 42, 46, 47, 50, 51], "input_channel": 17, "conv2d": [17, 24, 26, 27, 31, 36, 46, 51], "conv2": [17, 27], "linear1": 17, "dim": [17, 27], "lambda": 17, "isinst": 17, "util": [17, 26, 27, 30, 38], "certain": [17, 36, 37, 38, 42], "Of": 17, "signific": [17, 43], "second": [17, 24, 42], "subset": [17, 28, 30, 40, 51], "switch": 17, "docstr": 17, "instead": [17, 38, 39], "4d": 17, "2d": 17, "handl": 17, "time": [17, 27, 29, 36, 37, 41, 48], "mention": 17, "assist": [17, 48, 49], "transform": [17, 27, 46], "set_activation_quantizers_to_float": 17, "set_grouped_blockwise_quantization_for_weight": 17, "decompress": 17, "bw": 17, "experi": [17, 36, 45], "similar": [17, 39, 41, 44], "addition": 17, "effect": [17, 24, 27, 30, 38, 40, 42, 44], "larger": [17, 47, 50], "reduc": [17, 24, 31, 36, 39, 43, 46, 51], "write": 17, "snippet": 17, "format": [17, 25, 29, 35], "encoding_vers": 17, "exported_model": 17, "dummy_input": [17, 27], "present": [17, 26, 27, 36, 39], "techniqu": [20, 21, 22, 23, 27, 28, 29, 31, 32, 34, 38, 40, 41, 43, 44, 45, 46, 47, 50], "num_bin": [21, 22], "2048": [21, 22], "percentil": 21, "100": [21, 26, 27], "set_percentil": 21, "clip": [21, 22, 42, 44], "largest": 21, "smallest": 21, "50": [21, 32], "indic": [21, 24, 32, 51], "asymmetric_delta_candid": 22, "symmetric_delta_candid": 22, "101": 22, "offset_candid": 22, "21": 22, "max_parallel": 22, "gamma": 22, "sqnr": [22, 44], "calcul": [22, 24, 33, 39, 40, 44], "histogram": [22, 38, 40, 44, 46], "delta": [22, 44], "search": [22, 33, 41, 42], "mode": [22, 25, 37, 38, 42], "paral": 22, "memori": [22, 32, 36, 47, 50, 51], "usag": [22, 23, 32, 36, 43], "faster": [22, 28, 34, 41], "nois": [22, 27, 38, 39, 40, 41, 42], "compute_encodings_from_stat": 22, "stat": 22, "is_symmetr": [22, 42], "lowest": 22, "expect": [22, 27, 36, 38, 40], "_histogram": 22, "els": [22, 27, 39], "tool": [23, 27, 36, 39, 49, 51], "compress": [23, 31, 34, 46, 47, 49, 50, 51], "deploi": [23, 44], "edg": [23, 34], "devic": [23, 27, 44], "fix": [23, 34, 38, 43, 44, 46], "post": [23, 27, 28, 29, 34, 36, 41, 44, 46], "fine": [23, 32, 34, 38, 41, 44], "tune": [23, 32, 34, 38, 41, 44], "minim": [23, 34, 36, 38, 44], "incur": [23, 34, 40], "pictur": [23, 31, 34], "high": [23, 26, 28, 30, 32, 33, 34, 39, 43, 45, 46, 49], "level": [23, 26, 30, 32, 33, 34, 38, 43, 45, 48], "view": [23, 27, 34, 37, 48], "workflow": [23, 27, 32, 34], "low": [23, 28, 30, 36, 38, 39, 43, 45], "recov": [23, 34, 43, 44], "lost": [23, 34], "via": [23, 32, 34, 44], "torchscript": 23, "target": [23, 30, 32, 33, 34, 36, 38, 43, 44, 46], "neural": [23, 27, 29, 32, 34, 36, 38, 41, 43, 44, 50], "sdk": [23, 27, 34], "instal": [23, 46], "sample_input": [23, 27], "sampl": [23, 24, 27, 31, 38, 39, 40, 41, 44], "data_load": [23, 27], "sample_output": 23, "out_dir": 23, "quantized_model": 23, "quickstart": 23, "guid": [23, 26, 32, 39, 43, 45, 46], "depth": [23, 32, 43], "adapt": [23, 27, 28, 38, 40, 46], "round": [23, 24, 28, 38, 40, 44], "adaround": [23, 26, 29, 38, 43, 45, 46], "sqnrencodinganalyz": [23, 26], "percentileencodinganalyz": [23, 26], "quantizationmixin": [23, 24], "quantize_dequant": 23, "product": [23, 34], "technologi": [23, 34], "subsidiari": [23, 34], "network": [24, 27, 29, 32, 33, 36, 38, 41, 43, 44, 48, 50], "aimet": [24, 26, 27, 34, 37, 42, 45], "serv": [24, 48], "drop": [24, 29, 32, 36, 39, 40, 41, 43, 44], "nativ": 24, "state": [24, 27, 36], "superset": 24, "mean": [24, 27, 31, 42, 44], "coverag": 24, "limit": [24, 26, 35], "tabl": [24, 26, 33, 37, 48], "full": [24, 50], "basequantizationmixin": 24, "respons": [24, 36], "control": [24, 44, 45], "descript": [24, 37], "dict": [24, 25], "By": [24, 36, 42, 44], "index": [24, 32, 46], "respect": [24, 40], "per_channel_quant": [24, 42], "elementwis": [24, 46], "multipli": [24, 32], "qmul": 24, "sens": 24, "qadd": 24, "quantizedadd": 24, "befor": [24, 26, 27, 28, 29, 30, 36, 38, 41, 48, 49], "first": [24, 27, 32, 36, 38, 41, 48], "disabl": [24, 26, 33, 36, 40, 42, 44], "through": [24, 26, 27, 39, 40, 44, 45, 48, 49], "them": [24, 26, 27, 28, 51], "calibration_data_load": 24, "adaptiveavgpool1d": 24, "fakequantizedadaptiveavgpool1d": 24, "adaptiveavgpool2d": 24, "fakequantizedadaptiveavgpool2d": 24, "adaptiveavgpool3d": 24, "fakequantizedadaptiveavgpool3d": 24, "adaptivemaxpool1d": 24, "fakequantizedadaptivemaxpool1d": 24, "adaptivemaxpool2d": 24, "fakequantizedadaptivemaxpool2d": 24, "adaptivemaxpool3d": 24, "fakequantizedadaptivemaxpool3d": 24, "alphadropout": 24, "fakequantizedalphadropout": 24, "avgpool1d": 24, "fakequantizedavgpool1d": 24, "avgpool2d": 24, "fakequantizedavgpool2d": 24, "avgpool3d": 24, "fakequantizedavgpool3d": 24, "batchnorm1d": 24, "fakequantizedbatchnorm1d": 24, "batchnorm2d": [24, 27], "fakequantizedbatchnorm2d": 24, "batchnorm3d": 24, "fakequantizedbatchnorm3d": 24, "celu": 24, "fakequantizedcelu": 24, "channelshuffl": 24, "fakequantizedchannelshuffl": 24, "constantpad1d": 24, "fakequantizedconstantpad1d": 24, "constantpad2d": 24, "fakequantizedconstantpad2d": 24, "constantpad3d": 24, "fakequantizedconstantpad3d": 24, "conv1d": [24, 46], "fakequantizedconv1d": 24, "quantizedconv1d": 24, "fakequantizedconv2d": 24, "conv3d": 24, "fakequantizedconv3d": 24, "quantizedconv3d": 24, "convtranspose1d": [24, 46], "fakequantizedconvtranspose1d": 24, "convtranspose2d": 24, "fakequantizedconvtranspose2d": 24, "convtranspose3d": 24, "fakequantizedconvtranspose3d": 24, "crossmaplrn2d": 24, "fakequantizedcrossmaplrn2d": 24, "dropout": 24, "fakequantizeddropout": 24, "dropout2d": 24, "fakequantizeddropout2d": 24, "dropout3d": 24, "fakequantizeddropout3d": 24, "elu": 24, "fakequantizedelu": 24, "featurealphadropout": 24, "fakequantizedfeaturealphadropout": 24, "flatten": 24, "fakequantizedflatten": 24, "fold": [24, 28, 29, 30, 38, 39, 40, 46], "fakequantizedfold": 24, "fractionalmaxpool2d": 24, "fakequantizedfractionalmaxpool2d": 24, "fractionalmaxpool3d": 24, "fakequantizedfractionalmaxpool3d": 24, "gelu": 24, "fakequantizedgelu": 24, "quantizedgelu": 24, "glu": 24, "fakequantizedglu": 24, "groupnorm": 24, "fakequantizedgroupnorm": 24, "hardshrink": 24, "fakequantizedhardshrink": 24, "hardsigmoid": 24, "fakequantizedhardsigmoid": 24, "hardswish": 24, "fakequantizedhardswish": 24, "hardtanh": 24, "fakequantizedhardtanh": 24, "ident": [24, 27], "fakequantizedident": 24, "instancenorm1d": 24, "fakequantizedinstancenorm1d": 24, "instancenorm2d": 24, "fakequantizedinstancenorm2d": 24, "instancenorm3d": 24, "fakequantizedinstancenorm3d": 24, "lppool1d": 24, "fakequantizedlppool1d": 24, "lppool2d": 24, "fakequantizedlppool2d": 24, "layernorm": 24, "fakequantizedlayernorm": 24, "quantizedlayernorm": 24, "leakyrelu": 24, "fakequantizedleakyrelu": 24, "localresponsenorm": 24, "fakequantizedlocalresponsenorm": 24, "logsigmoid": 24, "fakequantizedlogsigmoid": 24, "logsoftmax": 24, "fakequantizedlogsoftmax": 24, "maxpool1d": 24, "fakequantizedmaxpool1d": 24, "maxpool2d": 24, "fakequantizedmaxpool2d": 24, "maxpool3d": 24, "fakequantizedmaxpool3d": 24, "maxunpool1d": 24, "fakequantizedmaxunpool1d": 24, "maxunpool2d": 24, "fakequantizedmaxunpool2d": 24, "maxunpool3d": 24, "fakequantizedmaxunpool3d": 24, "mish": 24, "fakequantizedmish": 24, "prelu": 24, "fakequantizedprelu": 24, "pixelshuffl": 24, "fakequantizedpixelshuffl": 24, "pixelunshuffl": 24, "fakequantizedpixelunshuffl": 24, "rrelu": 24, "fakequantizedrrelu": 24, "relu": [24, 26, 27, 39, 42, 51], "fakequantizedrelu": [24, 26, 27], "relu6": [24, 39], "fakequantizedrelu6": 24, "reflectionpad1d": 24, "fakequantizedreflectionpad1d": 24, "reflectionpad2d": 24, "fakequantizedreflectionpad2d": 24, "replicationpad1d": 24, "fakequantizedreplicationpad1d": 24, "replicationpad2d": 24, "fakequantizedreplicationpad2d": 24, "replicationpad3d": 24, "fakequantizedreplicationpad3d": 24, "selu": 24, "fakequantizedselu": 24, "silu": 24, "fakequantizedsilu": 24, "sigmoid": 24, "fakequantizedsigmoid": 24, "quantizedsigmoid": 24, "softmax": [24, 27], "fakequantizedsoftmax": 24, "quantizedsoftmax": [24, 27], "softmax2d": 24, "fakequantizedsoftmax2d": 24, "softmin": 24, "fakequantizedsoftmin": 24, "softplu": 24, "fakequantizedsoftplu": 24, "softshrink": 24, "fakequantizedsoftshrink": 24, "softsign": 24, "fakequantizedsoftsign": 24, "syncbatchnorm": 24, "fakequantizedsyncbatchnorm": 24, "tanh": 24, "fakequantizedtanh": 24, "tanhshrink": 24, "fakequantizedtanhshrink": 24, "threshold": [24, 29], "fakequantizedthreshold": 24, "unflatten": 24, "fakequantizedunflatten": 24, "unfold": 24, "fakequantizedunfold": 24, "upsampl": [24, 37], "fakequantizedupsampl": 24, "upsamplingbilinear2d": 24, "fakequantizedupsamplingbilinear2d": 24, "upsamplingnearest2d": 24, "fakequantizedupsamplingnearest2d": 24, "zeropad2d": 24, "fakequantizedzeropad2d": 24, "bceloss": 24, "fakequantizedbceloss": 24, "bcewithlogitsloss": 24, "fakequantizedbcewithlogitsloss": 24, "bilinear": [24, 37], "fakequantizedbilinear": 24, "ctcloss": 24, "fakequantizedctcloss": 24, "cosinesimilar": 24, "fakequantizedcosinesimilar": 24, "crossentropyloss": [24, 27], "fakequantizedcrossentropyloss": 24, "hingeembeddingloss": 24, "fakequantizedhingeembeddingloss": 24, "huberloss": 24, "fakequantizedhuberloss": 24, "kldivloss": 24, "fakequantizedkldivloss": 24, "l1loss": 24, "fakequantizedl1loss": 24, "mseloss": 24, "fakequantizedmseloss": 24, "multilabelmarginloss": 24, "fakequantizedmultilabelmarginloss": 24, "multilabelsoftmarginloss": 24, "fakequantizedmultilabelsoftmarginloss": 24, "multimarginloss": 24, "fakequantizedmultimarginloss": 24, "nllloss": 24, "fakequantizednllloss": 24, "nllloss2d": 24, "fakequantizednllloss2d": 24, "pairwisedist": 24, "fakequantizedpairwisedist": 24, "poissonnllloss": 24, "fakequantizedpoissonnllloss": 24, "smoothl1loss": 24, "fakequantizedsmoothl1loss": 24, "softmarginloss": 24, "fakequantizedsoftmarginloss": 24, "cosineembeddingloss": 24, "fakequantizedcosineembeddingloss": 24, "gaussiannllloss": 24, "fakequantizedgaussiannllloss": 24, "marginrankingloss": 24, "fakequantizedmarginrankingloss": 24, "tripletmarginloss": 24, "fakequantizedtripletmarginloss": 24, "tripletmarginwithdistanceloss": 24, "fakequantizedtripletmarginwithdistanceloss": 24, "embed": [24, 36, 43], "fakequantizedembed": 24, "embeddingbag": 24, "fakequantizedembeddingbag": 24, "gru": [24, 46], "fakequantizedgru": 24, "rnn": [24, 46], "fakequantizedrnn": 24, "grucel": 24, "fakequantizedgrucel": 24, "rnncell": 24, "fakequantizedrnncel": 24, "lstm": [24, 46], "fakequantizedlstm": 24, "lstmcell": 24, "fakequantizedlstmcel": 24, "adaptivelogsoftmaxwithloss": 24, "fakequantizedadaptivelogsoftmaxwithloss": 24, "aimet_op": 24, "fakequantizedcast": 24, "depthtospacedcrmod": 24, "fakequantizeddepthtospacedcrmod": 24, "onehot": 24, "fakequantizedonehot": 24, "exponenti": 24, "fakequantizedexponenti": 24, "erf": 24, "fakequantizederf": 24, "sqrt": 24, "fakequantizedsqrt": 24, "log": [24, 40], "fakequantizedlog": 24, "ab": [24, 39], "fakequantizedab": 24, "fakequantizedneg": 24, "elementwiseceil": 24, "fakequantizedelementwiseceil": 24, "elementwisefloor": 24, "fakequantizedelementwisefloor": 24, "sin": 24, "fakequantizedsin": 24, "co": 24, "fakequantizedco": 24, "asin": 24, "fakequantizedasin": 24, "atan": 24, "fakequantizedatan": 24, "fakequantizedround": 24, "logicalnot": 24, "fakequantizedlogicalnot": 24, "nonzero": 24, "fakequantizednonzero": 24, "elementwiseunarysign": 24, "fakequantizedelementwiseunarysign": 24, "rsqrt": 24, "fakequantizedrsqrt": 24, "squar": [24, 44], "fakequantizedsquar": 24, "fakequantizedmean": 24, "sum": [24, 27], "fakequantizedsum": 24, "prod": 24, "fakequantizedprod": 24, "argmin": 24, "fakequantizedargmin": 24, "argmax": [24, 27], "fakequantizedargmax": 24, "gather": 24, "fakequantizedgath": 24, "reshap": 24, "fakequantizedreshap": 24, "roialign": 24, "fakequantizedroialign": 24, "permut": 24, "fakequantizedpermut": 24, "indexselect": 24, "fakequantizedindexselect": 24, "topk": 24, "fakequantizedtopk": 24, "tile": 24, "fakequantizedtil": 24, "norm": [24, 28, 30, 38, 39, 40], "fakequantizednorm": 24, "cumsum": 24, "fakequantizedcumsum": 24, "interpol": [24, 33], "fakequantizedinterpol": 24, "normal": [24, 30, 40], "pad": [24, 27], "fakequantizedpad": 24, "fakequantizedshap": 24, "fakequantizedexpand": 24, "stridedslic": 24, "fakequantizedstridedslic": 24, "matmul": [24, 46], "fakequantizedmatmul": 24, "fakequantizedadd": 24, "fakequantizedmultipli": 24, "subtract": 24, "fakequantizedsubtract": 24, "quantizedsubtract": 24, "fakequantizeddivid": 24, "floordivid": 24, "fakequantizedfloordivid": 24, "fakequantizedgreat": 24, "fakequantizedless": 24, "greaterequ": 24, "fakequantizedgreaterequ": 24, "lessequ": 24, "fakequantizedlessequ": 24, "notequ": 24, "fakequantizednotequ": 24, "fakequantizedequ": 24, "remaind": 24, "fakequantizedremaind": 24, "fmod": 24, "fakequantizedfmod": 24, "pow": 24, "fakequantizedpow": 24, "customsilu": 24, "fakequantizedcustomsilu": 24, "fakequantizedmaximum": 24, "fakequantizedmax": 24, "fakequantizedminimum": 24, "fakequantizedmin": 24, "bmm": 24, "fakequantizedbmm": 24, "logicalor": 24, "fakequantizedlogicalor": 24, "logicaland": 24, "fakequantizedlogicaland": 24, "customgath": 24, "fakequantizedcustomgath": 24, "gathernd": 24, "fakequantizedgathernd": 24, "baddbmm": 24, "fakequantizedbaddbmm": 24, "addmm": 24, "fakequantizedaddmm": 24, "scatternd": 24, "fakequantizedscatternd": 24, "dynamicconv2d": 24, "fakequantizeddynamicconv2d": 24, "scatterel": 24, "fakequantizedscatterel": 24, "batchnorm": [24, 29, 39, 51], "fakequantizedbatchnorm": 24, "fakequantizedaimetgroupnorm": 24, "nonmaxsuppress": 24, "fakequantizednonmaxsuppress": 24, "fakequantizedsplit": 24, "concat": [24, 46], "fakequantizedconcat": 24, "fakequantizedwher": 24, "maskedfil": 24, "fakequantizedmaskedfil": 24, "allow_overwrit": [25, 26], "allow_overwit": 25, "flag": [25, 26], "get_encod": 25, "get_legacy_encod": 25, "register_quantization_paramet": 25, "set_legacy_encod": 25, "learn": [26, 30, 36, 38, 41, 44, 45, 46], "v1": [26, 45], "debug": [26, 27, 43, 45], "simpler": 26, "extend": [26, 45], "overview": 26, "fundament": 26, "advis": [26, 38, 42, 45], "subject": [26, 45], "understand": [26, 38, 42, 48, 49], "interact": 26, "remain": [26, 33, 38, 39, 44], "hood": 26, "build": [26, 45], "properti": 26, "shown": [26, 28, 36, 39, 40, 43], "intern": [26, 36, 38, 39, 42], "compon": [26, 45], "namespac": [26, 45], "directli": [26, 40, 44], "adaround_weight": 26, "sequenti": [26, 42, 43, 45], "mse": [26, 40, 44, 45], "seq_ms": 26, "apply_seq_ms": 26, "quantanalyz": [26, 38, 45, 46], "quant_analyz": 26, "autoqu": [26, 38, 41, 46], "auto_qu": 26, "longer": [26, 38, 41], "libpymo": 26, "statement": [26, 37], "stai": 26, "quantschem": [26, 29], "cross_layer_equ": [26, 37], "equalize_model": [26, 37], "model_prepar": [26, 27], "prepare_model": [26, 27], "wrap": 26, "quantizewrapp": 26, "quantizationsimmodelv1": 26, "all_quant_wrapp": 26, "quant_wrapp": 26, "staticgridquantwrapp": 26, "_module_to_wrap": 26, "200": 26, "contrast": 26, "definit": [26, 27, 38], "quantizationsimmodelv2": 26, "sim2": 26, "all_q_modul": 26, "qmodul": 26, "q_modul": 26, "here": [26, 27, 32, 41, 48], "reli": 26, "staticgridquant": 26, "learnedgridquant": 26, "could": [26, 27, 31, 51], "quantizationdatatyp": 26, "tensor_quant": 26, "staticgridperchannelquant": 26, "fp_quantiz": 26, "data_typ": 26, "affine_quant": 26, "howev": [26, 38, 39, 41, 42, 44], "separ": [26, 30, 40, 43, 46], "relat": [26, 40, 44], "affine_q": 26, "affine_qdq": 26, "fp_qdq": 26, "floatquantizedequant": 26, "sim1": 26, "wrap_linear": 26, "symmetri": 26, "use_symmetric_encod": 26, "is_unsigned_symmetr": 26, "use_strict_symmetr": 26, "simplifi": 26, "tfencod": 26, "copy_": 26, "OR": 26, "_remove_input_quant": 26, "_remove_output_quant": 26, "_remove_param_quant": 26, "param_encod": 26, "temporarili": 26, "assert": [26, 27], "freez": [26, 28], "_is_encoding_frozen": 26, "freeze_encod": 26, "concept": 26, "mimick": 26, "involv": [26, 27, 38, 43, 45], "requires_grad_": 26, "prevent": [26, 31], "overwritten": 26, "ti": 26, "design": [26, 39, 45], "portabl": [26, 45], "It": [26, 27, 30, 33, 38, 39, 42, 48, 49, 51], "guidelin": [26, 27, 28, 32, 41], "learnedgridquantwrapp": 26, "encodinganalyzerforpython": 26, "affineencod": 26, "floatencod": 26, "vectorencod": 26, "tutori": 27, "simpl": [27, 38, 51], "intend": [27, 32], "meant": 27, "demonstr": 27, "art": 27, "eval": [27, 33, 36, 48], "loop": [27, 43], "evalu": [27, 29, 33, 36, 38, 40, 41, 44, 48], "clearli": 27, "what": [27, 44, 48], "happen": 27, "let": 27, "special": 27, "look": [27, 48], "torchvis": 27, "is_avail": 27, "loader": [27, 28], "cifar10_train_data": 27, "dataset": [27, 38, 39, 44], "fashionmnist": 27, "tmp": 27, "cifar10": 27, "totensor": 27, "cifar10_test_data": 27, "train_load": 27, "dataload": [27, 40], "batch_siz": 27, "shuffl": 27, "test_load": 27, "super": 27, "conv1": 27, "kernel_s": 27, "stride": 27, "bn_1": 27, "256": [27, 40], "bn_2": 27, "total": [27, 33, 44], "now": [27, 38, 45, 46, 51], "few": [27, 32, 38, 43, 44], "epoch": [27, 34, 36, 38, 41], "establish": 27, "baselin": [27, 33, 41], "send": 27, "loss_fn": 27, "adam": 27, "lr": 27, "1e": [27, 41], "batch_idx": 27, "enumer": [27, 30], "backward": 27, "zero_grad": 27, "fp_accuraci": 27, "91": 27, "70999908447266": 27, "accur": 27, "coupl": [27, 28], "care": 27, "conform": 27, "math": 27, "wherea": [27, 44], "incorrectli": 27, "ignor": 27, "previou": [27, 32, 33, 43], "complet": [27, 30, 43], "redefin": 27, "thankfulli": 27, "incompat": 27, "fulli": [27, 35], "prepared_model": 27, "fp_accuracy_prepar": 27, "2024": 27, "07": 27, "747": 27, "info": [27, 46], "806": 27, "modelprepar": 27, "ad": [27, 35, 38, 42, 46], "node": [27, 41, 44], "module_relu": 27, "module_relu_1": 27, "module_softmax": 27, "graphmodul": 27, "ep": 27, "momentum": 27, "track_running_stat": 27, "12544": 27, "getattr_1": 27, "getitem": 27, "graph_modul": 27, "print_read": 27, "distinct": 27, "execut": [27, 33, 48], "adjac": [27, 42], "whenev": 27, "unnecessari": [27, 51], "good": [27, 28], "idea": 27, "batch_norm_fold": 27, "iter": [27, 28, 39], "fold_all_batch_norm": 27, "input_shap": 27, "passthrough": 27, "previous": 27, "had": 27, "impact": [27, 33, 43], "readi": [27, 43], "encount": 27, "therefor": [27, 32, 39], "theoret": 27, "practic": [27, 36], "usual": [27, 41], "500": [27, 28, 39, 40], "1000": [27, 28, 39, 40], "estim": [27, 38, 39], "default_output_bw": 27, "default_param_bw": 27, "idx": 27, "break": 27, "quantized_accuraci": 27, "1500015258789": 27, "noth": 27, "everi": [27, 33, 36, 41, 49], "discuss": [27, 32, 43, 44], "advanc": [27, 45], "re": [27, 38], "One": [27, 32, 36, 47], "qat": [27, 28, 30, 34, 38, 43, 44, 46], "op": [27, 38, 42, 46], "repeat": [27, 31], "post_qat_accuraci": 27, "92": 27, "05333709716797": 27, "happi": 27, "export_path": 27, "model_nam": 27, "fashion_mnist_model": 27, "save": [27, 29, 44, 49], "sent": 27, "nearest": 28, "figur": [28, 33, 43, 51], "illustr": [28, 33, 38, 47, 50], "smaller": [28, 34, 43, 47, 50], "unlabel": [28, 38, 40, 44], "far": 28, "decid": [28, 48], "whether": [28, 41], "awai": 28, "closer": 28, "fp32": [28, 34, 39, 40, 41, 43, 44], "width": [28, 43, 44, 47, 50, 51], "bc": 28, "bnf": 28, "batch": [28, 30, 38, 39, 40], "cle": [28, 38, 43, 46], "cross": [28, 29, 37, 38, 39, 40, 49], "hbf": 28, "awar": [28, 30, 34, 38, 43, 44], "don": 28, "But": [28, 36], "benefici": [28, 40, 41], "consid": [28, 33, 38, 43], "help": [28, 33, 36, 38, 39, 40, 43, 48, 49], "Not": [28, 33], "hyper": [28, 41], "expos": 28, "stabl": 28, "mani": [28, 39, 44], "often": [28, 29, 36, 41], "approxim": [28, 32, 39, 40], "1024": [28, 37], "10000": 28, "moder": 28, "least": [28, 31], "beta": 28, "warm": 28, "period": 28, "offer": 29, "suit": 29, "sequenc": [29, 30, 37, 42], "try": [29, 31, 33, 36, 38, 43], "variou": [29, 32, 36, 38, 43, 44, 46, 49], "error": [29, 38, 41, 43, 44], "prone": 29, "consum": [29, 36], "amount": [29, 42], "toler": [29, 32], "soon": 29, "reach": [29, 32], "stop": 29, "summari": 29, "autom": [29, 38], "prepar": [29, 38, 46], "check": [29, 38, 41, 43], "friendli": [29, 38, 39], "denot": 29, "best": [29, 32, 36, 38, 44], "preprat": 29, "mainli": 29, "three": [29, 32, 49], "stage": 29, "effort": 29, "manner": 29, "fail": [29, 37, 38], "goal": 29, "small": [30, 34, 38], "preceed": 30, "pcq": [30, 40], "veri": [30, 32, 36, 40, 49, 51], "NOT": [30, 51], "scenario": [30, 36, 38, 51], "decreas": 30, "main": [30, 42, 46, 49], "issu": [30, 34, 37, 43, 46, 48, 49], "depthwis": [30, 46], "oscil": 30, "quant": 30, "flow": [30, 38, 41, 43, 44], "diagram": [30, 33, 36, 44, 47, 50], "kera": [30, 34, 38, 42, 44, 46], "explain": [31, 36, 39, 44, 51], "occurr": 31, "ratio": [31, 32, 48], "magnitud": 31, "matrix": 31, "upstream": [31, 51], "gain": [31, 36], "presenc": 31, "connect": [31, 35, 50], "residu": 31, "sometim": [31, 36, 39, 40], "attempt": [31, 38, 39], "close": [31, 32, 44], "prior": [31, 38, 40], "random": [31, 40], "regress": 31, "svd": [32, 33, 35, 36, 46], "spatial": [32, 33, 35, 36, 46], "ssvd": 32, "prune": [32, 33, 35, 36, 46, 51], "accumul": 32, "mac": [32, 36, 47, 50], "reduct": 32, "uncompress": 32, "algorithm": [32, 33, 36, 43, 51], "overal": [32, 36, 43], "latenc": 32, "bandwidth": 32, "vari": [32, 33, 39, 49], "architectur": 32, "io": [32, 46], "html": [32, 40, 46, 49], "At": [32, 36, 45], "half": 32, "unknown": 32, "apriori": 32, "cssvd": 32, "tri": [32, 38], "75": 32, "pick": [32, 33, 36], "2b": 32, "rel": [32, 38, 43, 49], "avoid": 32, "larg": [32, 41, 47, 50], "2a": 32, "revisit": 32, "ccp": 32, "resnet": 32, "csvd": 32, "assess": 33, "sensit": [33, 38, 40, 43, 44, 46], "applic": [33, 37], "find": [33, 38, 40, 41, 44], "sure": [33, 37], "highest": 33, "dictionari": [33, 36, 42], "column": 33, "captur": 33, "predefin": 33, "candid": [33, 36], "unmodifi": 33, "score": [33, 36, 48], "last": [33, 35, 43], "monoton": 33, "fit": 33, "strict": [33, 42, 44], "procedur": [33, 36], "curv": 33, "core": 33, "constant": [33, 38], "met": 33, "binari": 33, "solut": [33, 41, 43], "quickli": 33, "suggest": [33, 36, 39], "lesser": [33, 36], "drstical": 33, "softwar": [34, 36], "framework": [34, 38, 42, 44], "meta": [34, 38], "h5": [34, 38], "hw": 34, "ptq": [34, 38, 40, 41], "redund": 34, "dilat": 35, "modules_to_ignor": 35, "depthwiseconv2d": 35, "librari": 36, "guidebook": [36, 38], "advic": 36, "greedi": [36, 48], "phase": [36, 38], "choic": [36, 44], "nomin": 36, "And": 36, "ml": [36, 38, 39, 48, 49], "those": 36, "fc": 36, "decompos": [36, 47, 50], "term": [36, 47, 48, 49, 50], "sharp": 36, "degrad": 36, "might": [36, 40], "rate": [36, 41], "carefulli": 36, "decai": 36, "slow": 36, "someth": [36, 48], "speed": [36, 39, 46], "itself": [36, 44, 47, 50], "load": 36, "searcher": 36, "Or": 36, "strike": 36, "balanc": 36, "chosen": 36, "major": [36, 45], "sai": 36, "xiangyu": 36, "zhang": 36, "jianhua": 36, "zou": 36, "kaim": 36, "he": 36, "jian": 36, "sun": 36, "deep": 36, "classif": 36, "detect": 36, "transact": 36, "pattern": 36, "analysi": [36, 43], "intellig": 36, "vol": 36, "pp": 36, "1943": 36, "1955": 36, "oct": 36, "2016": 36, "yihui": 36, "confer": [36, 39], "vision": [36, 39], "iccv": [36, 39], "venic": 36, "2017": 36, "1398": 36, "1406": 36, "jaderberg": 36, "andrea": 36, "vedaldi": 36, "andrew": 36, "zisserman": 36, "british": 36, "jan": 36, "2014": 36, "andrei": 36, "kuzmin": 36, "marku": [36, 39], "nagel": [36, 39], "saurabh": 36, "pitr": 36, "sandeep": 36, "pendyam": 36, "tijmen": [36, 39], "blankevoort": [36, 39], "taxonomi": 36, "graph": [37, 38, 44, 48], "successfulli": 37, "potenti": [37, 40, 48, 49], "workaround": 37, "primit": 37, "around": 37, "rewrit": 37, "slice": 37, "written": [37, 38], "caus": [37, 43, 44], "align_corn": 37, "deconvolut": 37, "deeplabv3": 37, "address": [37, 43, 48], "releas": [37, 45], "hardwar": [38, 39, 44], "predict": 38, "oppos": [38, 42], "advantag": 38, "No": 38, "pipelin": [38, 41, 43, 44], "suffici": [38, 40, 41, 44], "even": 38, "fast": 38, "easi": [38, 40], "gap": 38, "insert": [38, 44], "robust": 38, "account": [38, 41, 43], "trainabl": 38, "bias": 38, "reflect": [38, 44], "integr": 38, "standalon": 38, "consecut": [38, 39], "bn": [38, 46], "deprec": 38, "prep": 38, "accord": [38, 41, 42, 44], "align": 38, "retri": 38, "continu": [38, 39, 41, 43], "warn": 38, "hand": 38, "satisfactori": [38, 43], "bring": 38, "onto": 38, "thing": 38, "item": 38, "checkpoint": 38, "pb": 38, "trial": 38, "seem": 38, "off": [38, 39, 42], "bat": 38, "becom": 39, "paper": 39, "2019": 39, "arxiv": 39, "1906": 39, "04721": 39, "surround": 39, "highlight": [39, 48, 49], "big": 39, "discrep": 39, "accept": [39, 43], "wide": 39, "varianc": 39, "seen": [39, 40], "significantli": 39, "quantizaion": 39, "distribut": [39, 43, 44], "did": 39, "shift": 39, "empir": 39, "analyt": [39, 48, 49], "extract": 39, "bottleneck": [39, 43], "hybrid": 39, "approach": [39, 44], "mart": 39, "van": 39, "baalen": 39, "seoul": 39, "octob": 39, "hotspot": 40, "analys": 40, "callback": [40, 44], "plot": 40, "pretrain": [40, 41, 44], "dummi": 40, "label": [40, 41], "metric": [40, 44], "rune": 40, "doc": [40, 42, 48], "situat": 40, "pinpoint": 40, "culprit": 40, "again": [40, 41, 48], "per_layer_quant_en": 40, "per_layer_quant_dis": 40, "axi": 40, "track": 40, "min_max_rang": 40, "folder": 40, "enhanc": [40, 44], "toss": 40, "displai": [40, 48, 49], "activations_pdf": 40, "weights_pdf": 40, "monitor": 40, "contribut": [40, 43], "read": 40, "per_layer_mse_loss": 40, "mitig": [41, 44], "hyperparamet": 41, "accompani": 41, "throughout": [41, 42, 45, 49], "aid": 41, "converg": 41, "schedul": 41, "placement": 42, "fuse": [42, 44], "six": 42, "overrul": 42, "turn": 42, "op_typ": 42, "empti": 42, "is_output_quant": 42, "is_quant": 42, "strict_symmetr": 42, "unsigned_symmetr": 42, "omit": 42, "altogeth": 42, "asid": 42, "govern": 42, "unsign": [42, 44], "gemm": 42, "is_input_quant": 42, "recogn": [42, 44], "keep": [42, 43], "convent": 42, "preced": 42, "supergroup": [42, 46], "made": [42, 45], "op_list": 42, "member": 42, "branch": 42, "config": [42, 46], "entri": 42, "string": 42, "model_input": 42, "whatev": 42, "earlier": 42, "model_output": 42, "diagnost": 43, "strictli": 43, "insight": [43, 48, 49], "why": 43, "underperform": 43, "tackl": 43, "chart": 43, "saniti": 43, "ofth": 43, "independ": 43, "kept": 43, "convers": 43, "toward": 43, "wise": 43, "uneven": 43, "vanilla": 43, "global": 43, "restor": 43, "rest": 43, "inner": 43, "token": 43, "bert": 43, "reveal": 43, "problemat": [43, 49], "problem": 43, "resort": 43, "revert": 43, "power": [43, 45], "ultim": 44, "copi": 44, "ingest": 44, "feed": 44, "000": 44, "yield": 44, "dequantiz": 44, "hook": 44, "intercept": 44, "four": 44, "zero": [44, 46], "vice": 44, "versa": 44, "textrm": 44, "dfrac": 44, "strong": 44, "excess": 44, "signal": 44, "satur": 44, "erro": 44, "static": 44, "alongsid": 44, "ones": 44, "just": [44, 48, 51], "non": 44, "intermedi": 44, "welcom": 45, "motiv": 45, "clean": 45, "ground": 45, "maintain": 45, "familiar": 45, "newli": 45, "flexibl": 45, "transpar": 45, "redesign": 45, "yet": 45, "mainlin": 45, "compris": 45, "dispatch": 45, "easili": 45, "move": 45, "uphold": 45, "migrat": 45, "navig": 45, "blockwis": 45, "slim": 46, "backslash": 46, "user_guid": 46, "api_doc": 46, "quantizablemultiheadattent": 46, "kyuykim": 46, "multi": 46, "mangal": 46, "geunle": 46, "bug": 46, "correctli": 46, "leaf": 46, "klhsieh": 46, "akhobar": 46, "multiheadattent": 46, "ashvkuma": 46, "mha": 46, "pdf": 46, "fp16": 46, "minor": 46, "stand": [46, 47, 50], "adaptiveround": 46, "recurr": 46, "packag": 46, "decomposit": [47, 50], "singular": [47, 50], "\ud835\udc5a": [47, 50], "\ud835\udc5b": [47, 50], "\u210e": [47, 50], "\ud835\udc64": [47, 50], "give": [47, 50], "height": [47, 50, 51], "\ud835\udc58": [47, 50], "k": 47, "rank": [47, 50], "degre": [47, 50], "progress": [48, 49], "computation": [48, 49], "task": [48, 49], "websocket": 48, "tell": 48, "listen": 48, "rather": 48, "5006": 48, "compress_model": 48, "visualizecompress": 48, "display_eval_scor": 48, "display_comp_ratio_plot": 48, "directori": 49, "lot": 49, "anoth": [50, 51], "lose": 51, "much": 51, "explicitli": 51, "pictori": 51, "volum": 51, "hxwx8": 51, "hxwx5": 51, "simpli": 51, "propag": 51, "That": 51, "teh": 51, "green": 51, "color": 51, "side": 51, "action": 51, "taken": 51, "pink": 51, "orang": 51}, "objects": {"aimet_torch.v2.nn": [[6, 0, 1, "", "FakeQuantizationMixin"], [7, 0, 1, "", "QuantizationMixin"]], "aimet_torch.v2.nn.FakeQuantizationMixin": [[6, 1, 1, "", "__quant_init__"], [6, 1, 1, "", "compute_encodings"], [6, 1, 1, "", "forward"], [6, 1, 1, "", "from_module"], [6, 1, 1, "", "implements"], [6, 2, 1, "", "input_quantizers"], [6, 2, 1, "", "output_quantizers"], [6, 2, 1, "", "param_quantizers"]], "aimet_torch.v2.nn.QuantizationMixin": [[7, 1, 1, "", "__quant_init__"], [7, 1, 1, "", "compute_encodings"], [7, 1, 1, "", "forward"], [7, 1, 1, "", "from_module"], [7, 1, 1, "", "get_default_kernel"], [7, 1, 1, "", "get_kernel"], [7, 1, 1, "", "implements"], [7, 2, 1, "", "input_quantizers"], [7, 2, 1, "", "output_quantizers"], [7, 2, 1, "", "param_quantizers"], [7, 1, 1, "", "set_default_kernel"], [7, 1, 1, "", "set_kernel"]], "aimet_torch.v2.nn.base": [[24, 0, 1, "", "BaseQuantizationMixin"]], "aimet_torch.v2.nn.base.BaseQuantizationMixin": [[24, 1, 1, "", "__quant_init__"], [24, 1, 1, "", "compute_encodings"], [24, 1, 1, "", "forward"], [24, 2, 1, "", "input_quantizers"], [24, 2, 1, "", "output_quantizers"], [24, 2, 1, "", "param_quantizers"]], "aimet_torch.v2.quantization": [[13, 3, 0, "-", "affine"], [15, 3, 0, "-", "float"]], "aimet_torch.v2.quantization.affine": [[8, 0, 1, "", "Quantize"], [9, 0, 1, "", "QuantizeDequantize"], [10, 4, 1, "", "dequantize"], [11, 4, 1, "", "quantize"], [12, 4, 1, "", "quantize_dequantize"]], "aimet_torch.v2.quantization.affine.Quantize": [[8, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.QuantizeDequantize": [[9, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer": [[25, 0, 1, "", "Quantize"], [25, 0, 1, "", "QuantizeDequantize"], [25, 0, 1, "", "QuantizerBase"]], "aimet_torch.v2.quantization.affine.quantizer.Quantize": [[25, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize": [[25, 1, 1, "", "forward"]], "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase": [[25, 1, 1, "", "allow_overwrite"], [25, 1, 1, "", "compute_encodings"], [25, 1, 1, "", "get_encoding"], [25, 1, 1, "", "get_legacy_encodings"], [25, 1, 1, "", "is_initialized"], [25, 1, 1, "", "register_quantization_parameter"], [25, 1, 1, "", "set_legacy_encodings"]], "aimet_torch.v2.quantization.encoding_analyzer": [[18, 0, 1, "", "EncodingAnalyzer"], [20, 0, 1, "", "MinMaxEncodingAnalyzer"], [21, 0, 1, "", "PercentileEncodingAnalyzer"], [22, 0, 1, "", "SqnrEncodingAnalyzer"]], "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer": [[21, 1, 1, "", "set_percentile"]], "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer": [[22, 1, 1, "", "compute_encodings_from_stats"]], "aimet_torch.v2.quantization.float": [[14, 0, 1, "", "FloatQuantizeDequantize"], [14, 0, 1, "", "QuantizeDequantize"]], "aimet_torch.v2.quantization.tensor": [[16, 0, 1, "", "DequantizedTensor"], [16, 0, 1, "", "QuantizedTensor"]], "aimet_torch.v2.quantization.tensor.DequantizedTensor": [[16, 1, 1, "", "dequantize"], [16, 1, 1, "", "quantize"], [16, 1, 1, "", "quantized_repr"]], "aimet_torch.v2.quantization.tensor.QuantizedTensor": [[16, 1, 1, "", "dequantize"], [16, 1, 1, "", "quantize"], [16, 1, 1, "", "quantized_repr"]], "aimet_torch.v2.quantsim.config_utils": [[17, 4, 1, "", "set_activation_quantizers_to_float"], [17, 4, 1, "", "set_blockwise_quantization_for_weights"], [17, 4, 1, "", "set_grouped_blockwise_quantization_for_weights"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:attribute", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"aimet": [2, 3, 4, 23, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 50, 51], "instal": [2, 3, 4, 34], "quick": 2, "releas": [2, 3, 4, 34, 46], "packag": [2, 3, 4], "system": 2, "requir": [2, 40], "advanc": 2, "instruct": 2, "docker": 3, "set": 3, "variant": [3, 18], "us": [3, 28, 36, 38, 45, 48], "prebuilt": 3, "imag": 3, "build": 3, "local": 3, "start": [3, 23, 34, 48], "contain": 3, "from": [3, 4, 26], "pypi": [3, 4], "environ": [3, 4], "setup": [3, 4], "prerequisit": [4, 27], "gpu": 4, "pytorch": [4, 23, 27, 37, 38, 49], "2": [4, 27, 46], "1": [4, 27, 46], "tensorflow": [4, 38, 49], "13": [4, 46], "onnx": 4, "common": [4, 28], "debian": 4, "torch": 4, "replac": 4, "pillow": 4, "simd": 4, "onnxruntim": 4, "post": [4, 19, 38, 39], "step": 4, "fakequantizationmixin": 6, "quantizationmixin": 7, "quantiz": [8, 11, 13, 15, 16, 17, 19, 24, 25, 26, 27, 38, 39, 41, 42, 43, 44, 49], "quantizedequant": [9, 14], "dequant": 10, "quantize_dequant": 12, "affin": [13, 26], "class": [13, 16, 24], "function": 13, "floatquantizedequant": 14, "float": [15, 26, 27], "tensor": 16, "blockwis": 17, "low": 17, "power": 17, "lpbq": 17, "top": [17, 24, 25], "level": [17, 24, 25], "api": [17, 23, 24, 25], "export": [17, 27], "encod": [18, 24, 44], "analyz": 18, "train": [19, 27, 38, 39, 41], "minmaxencodinganalyz": 20, "percentileencodinganalyz": 21, "sqnrencodinganalyz": 22, "ai": [23, 34], "model": [23, 27, 34, 36, 37, 38], "effici": [23, 34], "toolkit": [23, 34], "document": 23, "get": [23, 34, 36], "exampl": [23, 26], "featur": [23, 26, 32, 34, 38, 43, 45], "descript": [23, 40], "modul": [24, 26], "configur": [24, 42, 44], "comput": 24, "migrat": 26, "quantsim": [26, 27, 44, 45], "v2": [26, 45], "chang": 26, "process": 26, "import": 26, "quantizationsimmodel": 26, "move": 26, "quantwrapp": 26, "staticgrid": 26, "learnedgrid": 26, "code": 26, "deprec": 26, "quickstart": 27, "guid": [27, 34], "overal": [27, 31], "flow": [27, 39], "prepar": 27, "point": 27, "batchnorm": 27, "fold": 27, "fine": [27, 36], "tune": [27, 36], "awar": [27, 41], "adaround": 28, "case": [28, 36, 38], "terminologi": 28, "autoqu": 29, "overview": [29, 30, 33, 34, 36, 39, 40, 41, 42, 44, 45, 48, 49, 51], "workflow": [29, 30, 38, 41, 44], "bn": 30, "re": 30, "estim": 30, "channel": 31, "prune": 31, "procedur": 31, "select": [31, 33, 36], "winnow": [31, 51], "weight": [31, 50], "reconstruct": 31, "compress": [32, 33, 36, 48], "guidebook": [32, 43], "greedi": 33, "ratio": [33, 36], "how": [33, 42, 48, 51], "work": [33, 51], "per": [33, 36], "layer": [33, 36], "explor": 33, "user": [34, 39], "inform": 34, "toc": 34, "tree": 34, "known": 35, "issu": 35, "option": 36, "techniqu": [36, 39], "better": 36, "result": 36, "rank": 36, "round": 36, "faq": [36, 39], "refer": [36, 39], "guidelin": [37, 38], "debug": 38, "analysi": [38, 40], "tool": [38, 48], "quantanalyz": 40, "detail": 40, "qat": 41, "mode": 41, "recommend": 41, "simul": [42, 44], "file": 42, "structur": 42, "individu": 42, "section": 42, "nois": 44, "determin": 44, "paramet": 44, "scheme": 44, "op": 44, "frequent": 44, "ask": 44, "question": 44, "new": 45, "note": 46, "22": 46, "0": 46, "21": 46, "20": 46, "19": 46, "py37": 46, "18": 46, "17": 46, "16": 46, "14": 46, "spatial": 47, "svd": [47, 50], "visual": [48, 49], "design": 48, "bokeh": 48, "server": 48, "session": 48}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"AIMET Installation": [[2, "aimet-installation"]], "Quick Install": [[2, "quick-install"]], "Release Packages": [[2, "release-packages"]], "System Requirements": [[2, "system-requirements"]], "Advanced Installation Instructions": [[2, "advanced-installation-instructions"]], "AIMET Installation in Docker": [[3, "aimet-installation-in-docker"]], "Set variant": [[3, "set-variant"]], "Use prebuilt docker image": [[3, "use-prebuilt-docker-image"]], "Build docker image locally": [[3, "build-docker-image-locally"]], "Start docker container": [[3, "start-docker-container"]], "Install AIMET packages": [[3, "install-aimet-packages"], [4, "install-aimet-packages"]], "From PyPI": [[3, "from-pypi"], [4, "from-pypi"]], "From Release Package": [[3, "from-release-package"], [4, "from-release-package"]], "Environment setup": [[3, "environment-setup"], [4, "environment-setup"]], "AIMET Installation and Setup": [[4, "aimet-installation-and-setup"]], "Install prerequisite packages": [[4, "install-prerequisite-packages"]], "Install GPU packages": [[4, "install-gpu-packages"]], "Install GPU packages for PyTorch 2.1 or TensorFlow": [[4, "install-gpu-packages-for-pytorch-2-1-or-tensorflow"]], "Install GPU packages for PyTorch 1.13 or ONNX": [[4, "install-gpu-packages-for-pytorch-1-13-or-onnx"]], "Install common debian packages": [[4, "install-common-debian-packages"]], "Install tensorflow GPU debian packages": [[4, "install-tensorflow-gpu-debian-packages"]], "Install torch GPU debian packages": [[4, "install-torch-gpu-debian-packages"]], "Install ONNX GPU debian packages": [[4, "install-onnx-gpu-debian-packages"]], "Replace Pillow with Pillow-SIMD": [[4, "replace-pillow-with-pillow-simd"]], "Replace onnxruntime with onnxruntime-gpu": [[4, "replace-onnxruntime-with-onnxruntime-gpu"]], "Post installation steps": [[4, "post-installation-steps"]], "FakeQuantizationMixin": [[6, "fakequantizationmixin"]], "QuantizationMixin": [[7, "quantizationmixin"]], "Quantize": [[8, "quantize"]], "QuantizeDequantize": [[9, "quantizedequantize"], [14, "quantizedequantize"]], "dequantize": [[10, "dequantize"]], "quantize": [[11, "quantize"]], "quantize_dequantize": [[12, "quantize-dequantize"]], "quantization.affine": [[13, "module-aimet_torch.v2.quantization.affine"]], "Classes": [[13, "classes"], [16, "classes"]], "Functions": [[13, "functions"]], "FloatQuantizeDequantize": [[14, "floatquantizedequantize"]], "quantization.float": [[15, "module-aimet_torch.v2.quantization.float"]], "quantization.tensor": [[16, "quantization-tensor"]], "Blockwise Quantization": [[17, "blockwise-quantization"]], "Low Power Blockwise Quantization (LPBQ)": [[17, "low-power-blockwise-quantization-lpbq"]], "Top Level API": [[17, "top-level-api"]], "Export": [[17, "export"]], "Encoding Analyzers": [[18, "encoding-analyzers"]], "Variants": [[18, "variants"]], "Post-Training Quantization": [[19, "post-training-quantization"], [38, "post-training-quantization"]], "MinMaxEncodingAnalyzer": [[20, "minmaxencodinganalyzer"]], "PercentileEncodingAnalyzer": [[21, "percentileencodinganalyzer"]], "SqnrEncodingAnalyzer": [[22, "sqnrencodinganalyzer"]], "AIMET: AI Model Efficiency Toolkit Documentation": [[23, "aimet-ai-model-efficiency-toolkit-documentation"]], "Getting Started": [[23, "getting-started"], [34, "getting-started"]], "Examples": [[23, null]], "Feature Descriptions": [[23, null]], "AIMET PyTorch API": [[23, null]], "Quantized Modules": [[24, "quantized-modules"]], "Top-level API": [[24, "top-level-api"], [25, "top-level-api"]], "Configuration": [[24, "configuration"]], "Computing Encodings": [[24, "computing-encodings"]], "Quantized Module Classes": [[24, "quantized-module-classes"]], "Quantizers": [[25, "quantizers"]], "Migrate to QuantSim v2": [[26, "migrate-to-quantsim-v2"]], "Changes in QuantSim v2": [[26, "changes-in-quantsim-v2"]], "Migration Process": [[26, "migration-process"]], "Imports": [[26, "imports"]], "QuantizationSimModel": [[26, "quantizationsimmodel"]], "Moving from QuantWrapper to Quantized Modules": [[26, "moving-from-quantwrapper-to-quantized-modules"]], "Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantizer": [[26, "moving-from-staticgrid-and-learnedgrid-quantizer-to-affine-and-float-quantizer"]], "Code Examples": [[26, "code-examples"]], "Deprecated Features": [[26, "deprecated-features"]], "Quickstart Guide": [[27, "quickstart-guide"]], "Overall flow": [[27, "overall-flow"]], "PyTorch prerequisites": [[27, "pytorch-prerequisites"]], "Prepare the floating point model for quantization": [[27, "prepare-the-floating-point-model-for-quantization"]], "1) Model preparation": [[27, "model-preparation"]], "2) BatchNorm fold": [[27, "batchnorm-fold"]], "Quantize the model": [[27, "quantize-the-model"]], "Fine-tune the model with quantization aware training": [[27, "fine-tune-the-model-with-quantization-aware-training"]], "Export the quantsim model": [[27, "export-the-quantsim-model"]], "AIMET AdaRound": [[28, "aimet-adaround"]], "AdaRound Use Cases": [[28, "adaround-use-cases"]], "Common terminology": [[28, "common-terminology"]], "Use Cases": [[28, "use-cases"], [38, "use-cases"]], "AIMET AutoQuant": [[29, "aimet-autoquant"]], "Overview": [[29, "overview"], [30, "overview"], [33, "overview"], [34, "overview"], [36, "overview"], [39, "overview"], [40, "overview"], [41, "overview"], [42, "overview"], [44, "overview"], [45, "overview"], [48, "overview"], [49, "overview"], [51, "overview"]], "Workflow": [[29, "workflow"], [30, "workflow"]], "AIMET BN Re-estimation": [[30, "aimet-bn-re-estimation"]], "AIMET Channel Pruning": [[31, "aimet-channel-pruning"]], "Overall Procedure": [[31, "overall-procedure"]], "Channel Selection": [[31, "channel-selection"]], "Winnowing": [[31, "winnowing"]], "Weight Reconstruction": [[31, "weight-reconstruction"]], "AIMET Compression Features Guidebook": [[32, "aimet-compression-features-guidebook"]], "AIMET Greedy Compression Ratio Selection": [[33, "aimet-greedy-compression-ratio-selection"]], "How it works": [[33, "how-it-works"]], "Per-layer Exploration": [[33, "per-layer-exploration"]], "Compression Ratio Selection": [[33, "compression-ratio-selection"]], "AI Model Efficiency Toolkit User Guide": [[34, "ai-model-efficiency-toolkit-user-guide"]], "Features": [[34, "features"]], "Release Information": [[34, "release-information"]], "Installation Guide": [[34, "installation-guide"]], "toc tree": [[34, "toc-tree"]], "AIMET Known Issues": [[35, "aimet-known-issues"]], "AIMET Model Compression": [[36, "aimet-model-compression"]], "Use Case": [[36, "use-case"]], "Compression ratio selection": [[36, "compression-ratio-selection"]], "Model Compression": [[36, "model-compression"]], "Optional techniques to get better compression results": [[36, "optional-techniques-to-get-better-compression-results"]], "Rank Rounding": [[36, "rank-rounding"]], "Per-layer Fine-tuning": [[36, "per-layer-fine-tuning"]], "FAQs": [[36, "faqs"], [39, "faqs"]], "References": [[36, "references"], [39, "references"]], "Model Guidelines for PyTorch": [[37, "model-guidelines-for-pytorch"]], "AIMET Model Quantization": [[38, "aimet-model-quantization"]], "AIMET Quantization Features": [[38, "aimet-quantization-features"]], "Debugging/Analysis Tools": [[38, "debugging-analysis-tools"]], "AIMET Quantization Workflow": [[38, "aimet-quantization-workflow"]], "PyTorch": [[38, "pytorch"], [49, "pytorch"]], "Tensorflow": [[38, "tensorflow"]], "Debugging Guidelines": [[38, "debugging-guidelines"]], "AIMET Post-Training Quantization Techniques": [[39, "aimet-post-training-quantization-techniques"]], "User Flow": [[39, "user-flow"]], "AIMET QuantAnalyzer": [[40, "aimet-quantanalyzer"]], "Requirements": [[40, "requirements"]], "Detailed Analysis Descriptions": [[40, "detailed-analysis-descriptions"]], "AIMET Quantization Aware Training": [[41, "aimet-quantization-aware-training"]], "QAT workflow": [[41, "qat-workflow"]], "QAT modes": [[41, "qat-modes"]], "Recommendations for Quantization-Aware Training": [[41, "recommendations-for-quantization-aware-training"]], "Quantization Simulation Configuration": [[42, "quantization-simulation-configuration"]], "Configuration File Structure": [[42, "configuration-file-structure"]], "How to configure individual Configuration File Sections": [[42, "how-to-configure-individual-configuration-file-sections"]], "AIMET Quantization Features Guidebook": [[43, "aimet-quantization-features-guidebook"]], "AIMET Quantization Simulation": [[44, "aimet-quantization-simulation"]], "QuantSim Workflow": [[44, "quantsim-workflow"]], "Simulating Quantization Noise": [[44, "simulating-quantization-noise"]], "Determining Quantization Parameters (Encodings)": [[44, "determining-quantization-parameters-encodings"]], "Quantization Schemes": [[44, "quantization-schemes"]], "Configuring Quantization Simulation Ops": [[44, "configuring-quantization-simulation-ops"]], "Frequently Asked Questions": [[44, "frequently-asked-questions"]], "QuantSim v2": [[45, "quantsim-v2"]], "Using QuantSim v2": [[45, "using-quantsim-v2"]], "New Features": [[45, "new-features"]], "AIMET Release Notes": [[46, "aimet-release-notes"]], "1.22.2": [[46, "id1"]], "1.22.1": [[46, "id2"]], "1.22.0": [[46, "id3"]], "1.21.0": [[46, "id4"]], "1.20.0": [[46, "id5"]], "1.19.1.py37": [[46, "py37"]], "1.19.1": [[46, "id6"]], "1.18.0.py37": [[46, "id7"]], "1.18.0": [[46, "id8"]], "1.17.0.py37": [[46, "id9"]], "1.17.0": [[46, "id10"]], "1.16.2.py37": [[46, "id11"]], "1.16.2": [[46, "id12"]], "1.16.1.py37": [[46, "id13"]], "1.16.1": [[46, "id14"]], "1.16.0": [[46, "id15"]], "1.14.0": [[46, "id16"]], "1.13.0": [[46, "id17"]], "AIMET Spatial SVD": [[47, "aimet-spatial-svd"]], "AIMET Visualization": [[48, "aimet-visualization"]], "Design": [[48, "design"]], "Compression": [[48, "compression"]], "Starting a Bokeh Server Session:": [[48, "starting-a-bokeh-server-session"]], "How to use the tool": [[48, "how-to-use-the-tool"]], "AIMET Visualization for Quantization": [[49, "aimet-visualization-for-quantization"]], "Quantization": [[49, "quantization"]], "TensorFlow": [[49, "tensorflow"]], "AIMET Weight SVD": [[50, "aimet-weight-svd"]], "AIMET Winnowing": [[51, "aimet-winnowing"]], "Winnowing Overview": [[51, "winnowing-overview"]], "How Winnowing Works": [[51, "how-winnowing-works"]]}, "indexentries": {"fakequantizationmixin (class in aimet_torch.v2.nn)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.fakequantizationmixin method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.forward"]], "from_module() (aimet_torch.v2.nn.fakequantizationmixin class method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.from_module"]], "implements() (aimet_torch.v2.nn.fakequantizationmixin class method)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.implements"]], "input_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.fakequantizationmixin attribute)": [[6, "aimet_torch.v2.nn.FakeQuantizationMixin.param_quantizers"]], "quantizationmixin (class in aimet_torch.v2.nn)": [[7, "aimet_torch.v2.nn.QuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.forward"]], "from_module() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.from_module"]], "get_default_kernel() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.get_default_kernel"]], "get_kernel() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.get_kernel"]], "implements() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.implements"]], "input_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.quantizationmixin attribute)": [[7, "aimet_torch.v2.nn.QuantizationMixin.param_quantizers"]], "set_default_kernel() (aimet_torch.v2.nn.quantizationmixin class method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.set_default_kernel"]], "set_kernel() (aimet_torch.v2.nn.quantizationmixin method)": [[7, "aimet_torch.v2.nn.QuantizationMixin.set_kernel"]], "quantize (class in aimet_torch.v2.quantization.affine)": [[8, "aimet_torch.v2.quantization.affine.Quantize"]], "forward() (aimet_torch.v2.quantization.affine.quantize method)": [[8, "aimet_torch.v2.quantization.affine.Quantize.forward"]], "quantizedequantize (class in aimet_torch.v2.quantization.affine)": [[9, "aimet_torch.v2.quantization.affine.QuantizeDequantize"]], "forward() (aimet_torch.v2.quantization.affine.quantizedequantize method)": [[9, "aimet_torch.v2.quantization.affine.QuantizeDequantize.forward"]], "dequantize() (in module aimet_torch.v2.quantization.affine)": [[10, "aimet_torch.v2.quantization.affine.dequantize"]], "quantize() (in module aimet_torch.v2.quantization.affine)": [[11, "aimet_torch.v2.quantization.affine.quantize"]], "quantize_dequantize() (in module aimet_torch.v2.quantization.affine)": [[12, "aimet_torch.v2.quantization.affine.quantize_dequantize"]], "aimet_torch.v2.quantization.affine": [[13, "module-aimet_torch.v2.quantization.affine"]], "module": [[13, "module-aimet_torch.v2.quantization.affine"], [15, "module-aimet_torch.v2.quantization.float"]], "floatquantizedequantize (class in aimet_torch.v2.quantization.float)": [[14, "aimet_torch.v2.quantization.float.FloatQuantizeDequantize"]], "quantizedequantize (class in aimet_torch.v2.quantization.float)": [[14, "aimet_torch.v2.quantization.float.QuantizeDequantize"]], "aimet_torch.v2.quantization.float": [[15, "module-aimet_torch.v2.quantization.float"]], "dequantizedtensor (class in aimet_torch.v2.quantization.tensor)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor"]], "quantizedtensor (class in aimet_torch.v2.quantization.tensor)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor"]], "dequantize() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.dequantize"]], "dequantize() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.dequantize"]], "quantize() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.quantize"]], "quantize() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.quantize"]], "quantized_repr() (aimet_torch.v2.quantization.tensor.dequantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.DequantizedTensor.quantized_repr"]], "quantized_repr() (aimet_torch.v2.quantization.tensor.quantizedtensor method)": [[16, "aimet_torch.v2.quantization.tensor.QuantizedTensor.quantized_repr"]], "set_activation_quantizers_to_float() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_activation_quantizers_to_float"]], "set_blockwise_quantization_for_weights() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_blockwise_quantization_for_weights"]], "set_grouped_blockwise_quantization_for_weights() (in module aimet_torch.v2.quantsim.config_utils)": [[17, "aimet_torch.v2.quantsim.config_utils.set_grouped_blockwise_quantization_for_weights"]], "encodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[18, "aimet_torch.v2.quantization.encoding_analyzer.EncodingAnalyzer"]], "minmaxencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[20, "aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer"]], "percentileencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[21, "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer"]], "set_percentile() (aimet_torch.v2.quantization.encoding_analyzer.percentileencodinganalyzer method)": [[21, "aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.set_percentile"]], "sqnrencodinganalyzer (class in aimet_torch.v2.quantization.encoding_analyzer)": [[22, "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer"]], "compute_encodings_from_stats() (aimet_torch.v2.quantization.encoding_analyzer.sqnrencodinganalyzer method)": [[22, "aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer.compute_encodings_from_stats"]], "basequantizationmixin (class in aimet_torch.v2.nn.base)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin"]], "__quant_init__() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.__quant_init__"]], "compute_encodings() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.compute_encodings"]], "forward() (aimet_torch.v2.nn.base.basequantizationmixin method)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.forward"]], "input_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.input_quantizers"]], "output_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.output_quantizers"]], "param_quantizers (aimet_torch.v2.nn.base.basequantizationmixin attribute)": [[24, "aimet_torch.v2.nn.base.BaseQuantizationMixin.param_quantizers"]], "quantize (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.Quantize"]], "quantizedequantize (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize"]], "quantizerbase (class in aimet_torch.v2.quantization.affine.quantizer)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase"]], "allow_overwrite() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.allow_overwrite"]], "compute_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.compute_encodings"]], "forward() (aimet_torch.v2.quantization.affine.quantizer.quantize method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.Quantize.forward"]], "forward() (aimet_torch.v2.quantization.affine.quantizer.quantizedequantize method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizeDequantize.forward"]], "get_encoding() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.get_encoding"]], "get_legacy_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.get_legacy_encodings"]], "is_initialized() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.is_initialized"]], "register_quantization_parameter() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.register_quantization_parameter"]], "set_legacy_encodings() (aimet_torch.v2.quantization.affine.quantizer.quantizerbase method)": [[25, "aimet_torch.v2.quantization.affine.quantizer.QuantizerBase.set_legacy_encodings"]]}}) \ No newline at end of file diff --git a/releases/1.33.0/torch_v2/toplevelhidden.html b/releases/1.33.0/torch_v2/toplevelhidden.html index ab388c7..262904c 100644 --- a/releases/1.33.0/torch_v2/toplevelhidden.html +++ b/releases/1.33.0/torch_v2/toplevelhidden.html @@ -1,8 +1,7 @@ - - + <no title> — AI Model Efficiency Toolkit Documentation: ver 1.33.0 diff --git a/releases/1.33.0/torch_v2/torch_docs/api/nn.fake_quantization_mixin.html b/releases/1.33.0/torch_v2/torch_docs/api/nn.fake_quantization_mixin.html index ad18a4c..1186374 100644 --- a/releases/1.33.0/torch_v2/torch_docs/api/nn.fake_quantization_mixin.html +++ b/releases/1.33.0/torch_v2/torch_docs/api/nn.fake_quantization_mixin.html @@ -1,8 +1,7 @@ - - + FakeQuantizationMixin — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,7 +118,7 @@
        -
        +

        FakeQuantizationMixin

        @@ -137,7 +136,7 @@

        ModuleList containing QuantizerBase objects to be applied to the layer’s input tensors

        -
        Type:
        +
        Type

        nn.ModuleList

        @@ -149,7 +148,7 @@

        ModuleList containing QuantizerBase objects to be applied to the layer’s output tensors

        -
        Type:
        +
        Type

        nn.ModuleList

        @@ -161,7 +160,7 @@

        ModuleDict mapping parameter names to associated QuantizerBase objects

        -
        Type:
        +
        Type

        nn.ModuleDict

        @@ -256,10 +255,10 @@

        The resulting quantized module contains the same attributes and parameters as the original module, but may be assigned input, output and parameter quantizers.

        -
        Parameters:
        +
        Parameters

        module (Module) – Floating point module to quantize

        -
        Returns:
        +
        Returns

        Quantized version of the original module

        @@ -284,7 +283,7 @@

        This decorator registers the defined class as the fake-quantized version of module_cls such that calling from_module() on an instance of module_cls will output an instance of the decorated class.

        -
        Parameters:
        +
        Parameters

        module_cls – The base torch.nn.Module class

        @@ -292,7 +291,7 @@
        -
        +
        diff --git a/releases/1.33.0/torch_v2/torch_docs/api/nn.quantization_mixin.html b/releases/1.33.0/torch_v2/torch_docs/api/nn.quantization_mixin.html index fb71129..2bff052 100644 --- a/releases/1.33.0/torch_v2/torch_docs/api/nn.quantization_mixin.html +++ b/releases/1.33.0/torch_v2/torch_docs/api/nn.quantization_mixin.html @@ -1,8 +1,7 @@ - - + QuantizationMixin — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -123,7 +122,7 @@

        Warning

        This feature is under heavy development and API changes may occur without notice in future verions.

        -
        +

        QuantizationMixin

        @@ -139,7 +138,7 @@

        QuantizationMixinModuleList containing QuantizerBase objects to be applied to the layer’s input tensors

        -
        Type:
        +
        Type

        nn.ModuleList

        @@ -151,7 +150,7 @@

        QuantizationMixinModuleList containing QuantizerBase objects to be applied to the layer’s output tensors

        -
        Type:
        +
        Type

        nn.ModuleList

        @@ -163,7 +162,7 @@

        QuantizationMixinModuleDict mapping parameter names to associated QuantizerBase objects

        -
        Type:
        +
        Type

        nn.ModuleDict

        @@ -247,7 +246,7 @@

        QuantizationMixinkernel in the forward pass unless within the compute_encodings() context.

        -
        Parameters:
        +
        Parameters

        kernel – Callable object to be used as the underlying kernel.

        @@ -282,7 +281,7 @@

        QuantizationMixin -
        Parameters:
        +
        Parameters

        kernel – Callable object to be used as the default kernel by all the instances of this class.

        @@ -328,10 +327,10 @@

        QuantizationMixin -
        Parameters:
        +
        Parameters

        module (Module) – Floating point module to quantize

        -
        Returns:
        +
        Returns

        Quantized version of the original module

        @@ -354,10 +353,10 @@

        QuantizationMixinclassmethod get_default_kernel()[source]

        Return the default kernel of the class

        -
        Return type:
        +
        Return type

        Optional[Callable]

        -
        Returns:
        +
        Returns

        Default kernel of the class. None if the default kernel is not set.

        @@ -369,10 +368,10 @@

        QuantizationMixin -
        Return type:
        +
        Return type

        Optional[Callable]

        -
        Returns:
        +
        Returns

        The kernel to be used by this instance.

        @@ -386,7 +385,7 @@

        QuantizationMixin - - + Quantize — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -122,7 +121,7 @@
        -
        +

        Quantize

        @@ -140,7 +139,7 @@

        Quantize -
        Parameters:
        +
        Parameters

        +
        Variables

    • Component in v1

      ++++ @@ -133,10 +136,14 @@

      Classes + +

      Quantize

      Applies quantization to the input.

      ++++ @@ -149,8 +156,8 @@

      Functions - - + FloatQuantizeDequantize — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -116,7 +115,7 @@
      -
      +

      FloatQuantizeDequantize

      @@ -140,7 +139,7 @@
      \[\begin{split}exponent\_max = 2^{exponent} - 1 \\\end{split}\]
      -
      Parameters:
      +
      Parameters
      • exponent_bits (int) – Number of exponent bits to simulate

      • mantissa_bits (int) – Number of mantissa bits to simulate

      • @@ -177,8 +176,8 @@
      -
      -
      +
      +

      quantize

      Applies quantization to the input.

      ++++ @@ -129,7 +132,7 @@

      FloatQuantizeDequantize

      Simulates quantization by fake-casting the input

      -
      +
      diff --git a/releases/1.33.0/torch_v2/torch_docs/api/quantization/tensor.html b/releases/1.33.0/torch_v2/torch_docs/api/quantization/tensor.html index f512be3..bfd3460 100644 --- a/releases/1.33.0/torch_v2/torch_docs/api/quantization/tensor.html +++ b/releases/1.33.0/torch_v2/torch_docs/api/quantization/tensor.html @@ -1,8 +1,7 @@ - - + quantization.tensor — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,9 +113,9 @@
      -
      +

      quantization.tensor

      -
      +

      Classes

      @@ -129,8 +128,7 @@

      Classesdequantize()[source]

      Dequantizes self using self.encoding to produce a DequantizedTensor with the same encoding information.

      -

      Example: -:rtype: DequantizedTensor

      +

      Example

      >>> from aimet_torch.v2.quantization as Q
       >>> x = torch.tensor([[2.57, -2.312],
       ...                   [0.153, 0.205]])
      @@ -148,6 +146,11 @@ 

      ClassesTrue

      +
      +
      Return type
      +

      DequantizedTensor

      +
      +
      @@ -155,7 +158,7 @@

      Classesquantize()[source]

      Returns self

      -
      Return type:
      +
      Return type

      QuantizedTensor

      @@ -201,7 +204,7 @@

      Classesdequantize()[source]

      Returns self

      -
      Return type:
      +
      Return type

      DequantizedTensor

      @@ -212,8 +215,7 @@

      Classesquantize()[source]

      Quantizes self using self.encoding to produce a QuantizedTensor with the same encoding information.

      -

      Example: -:rtype: QuantizedTensor

      +

      Example

      >>> import aimet_torch.v2.quantization as Q
       >>> x = torch.tensor([[0.39, 51.0], [3.521, 9.41]])
       >>> quant_dequant = Q.affine.QuantizeDequantize((1, ), 8, symmetric=False)
      @@ -227,6 +229,11 @@ 

      Classes [ 68., 97.]], grad_fn=<AliasBackward0>)

      +
      +
      Return type
      +

      QuantizedTensor

      +
      +

      @@ -257,8 +264,8 @@

      Classes - - + Blockwise Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -118,7 +117,7 @@

      Warning

      This feature is under heavy development and API changes may occur without notice in future versions.

      -
      +

      Blockwise Quantization

      When performing integer quantization, it is necessary to determine quantization parameters (also known as encodings) like scale and offset in order to define a quantization grid for mapping floating point values to their quantized integer @@ -225,8 +224,8 @@

      Blockwise Quantizationblock_size=(1, 4)) # (-1, -1) works too

      -
      -
      +
      +

      Low Power Blockwise Quantization (LPBQ)

      Qualcomm runtime supports an alternative to Blockwise Quantization referred to as Low Power Blockwise Quantization (LPBQ).

      In this scheme, blockwise encodings at a lower bitwidth are determined and then adjusted such that they lie on a common @@ -271,7 +270,7 @@

      Low Power Blockwise Quantization (LPBQ)block_grouping(1, 4, 1, 1)) # (1, -1, 1, 1) works too

      -
      +

      Top Level API

      Several top level API functions exist to make it easier to configure blockwise quantization and LPBQ quantization for a model:

      @@ -280,7 +279,7 @@

      Top Level APIaimet_torch.v2.quantsim.config_utils.set_blockwise_quantization_for_weights(sim, arg, bitwidth, symmetric, block_size)[source]

      Set weight parameter quantizers of modules to blockwise.

      -
      Parameters:
      +
      Parameters

      +

      Export

      Using Blockwise quantization results in a larger number of encodings produced as compared to Per-Tensor or Per-Channel quantization. As a result, a new method of exporting encodings to json has been developed to both reduce the exported @@ -474,8 +473,8 @@

      ExportThe 1.0.0 encodings format is supported by Qualcomm runtime and can be used to export Per-Tensor, Per-Channel, Blockwise, and LPBQ quantizer encodings. If Blockwise and/or LPBQ quantizers are present in the model, the 1.0.0 format must be used when exporting encodings for Qualcomm runtime.

      -

      - +
      + diff --git a/releases/1.33.0/torch_v2/torch_docs/encoding_analyzer.html b/releases/1.33.0/torch_v2/torch_docs/encoding_analyzer.html index d457d78..5e9f9ec 100644 --- a/releases/1.33.0/torch_v2/torch_docs/encoding_analyzer.html +++ b/releases/1.33.0/torch_v2/torch_docs/encoding_analyzer.html @@ -1,8 +1,7 @@ - - + Encoding Analyzers — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,16 +118,20 @@
      -
      +

      Encoding Analyzers

      class aimet_torch.v2.quantization.encoding_analyzer.EncodingAnalyzer(observer)[source]
      -
      +

      Variants

      ++++ @@ -141,8 +144,8 @@

      Variants - - + Post-Training Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,9 +118,9 @@ diff --git a/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html b/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html index 8b24962..33da878 100644 --- a/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html +++ b/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.MinMaxEncodingAnalyzer.html @@ -1,8 +1,7 @@ - - + MinMaxEncodingAnalyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -120,7 +119,7 @@
      -
      +

      MinMaxEncodingAnalyzer

      @@ -128,7 +127,7 @@

      MinMaxEncodingAnalyzer

      Encoding Analyzer for Min-Max calibration technique

      -
      +
      diff --git a/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.html b/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.html index 7a36153..47f28ad 100644 --- a/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.html +++ b/releases/1.33.0/torch_v2/torch_docs/generated/aimet_torch.v2.quantization.encoding_analyzer.PercentileEncodingAnalyzer.html @@ -1,8 +1,7 @@ - - + PercentileEncodingAnalyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -120,7 +119,7 @@
      -
      +

      PercentileEncodingAnalyzer

      @@ -132,7 +131,7 @@

      PercentileEncodingAnalyzer -
      Parameters:
      +
      Parameters

      percentile – Value from 50.0 to 100.0 indicating the clipping percentile

      @@ -140,7 +139,7 @@

      PercentileEncodingAnalyzer - - + SqnrEncodingAnalyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -120,14 +119,14 @@
      -
      +

      SqnrEncodingAnalyzer

      class aimet_torch.v2.quantization.encoding_analyzer.SqnrEncodingAnalyzer(shape, num_bins=2048, *, asymmetric_delta_candidates=17, symmetric_delta_candidates=101, offset_candidates=21, max_parallelism=64, gamma=3.0)[source]

      Encoding Analyzer for SQNR Calibration technique

      -
      Parameters:
      +
      Parameters
      • shape (tuple) – Shape of calculated encoding

      • num_bins (int) – number of bins to use per histogram

      • @@ -145,17 +144,17 @@

        SqnrEncodingAnalyzercompute_encodings_from_stats(stats, num_steps, is_symmetric)[source]

        Searches for encodings which produce the lowest expected SQNR based on the histograms in stats

        -
        Parameters:
        +
        Parameters
        • stats (List[_Histogram]) – A list of _Histogram objects with length equal to the number of encodings to compute

        • num_steps (int) – The number of bins the quantized range is split into

        • is_symmetric (bool) – If True, computes symmetric encodings, else computes asymmetric encodings

        -
        Return type:
        +
        Return type

        Tuple[Optional[Tensor], Optional[Tensor]]

        -
        Returns:
        +
        Returns

        Tuple of computed encodings (min, max) as tensors with shape self.shape

        @@ -163,7 +162,7 @@

        SqnrEncodingAnalyzer

      -
      +
      diff --git a/releases/1.33.0/torch_v2/torch_docs/index.html b/releases/1.33.0/torch_v2/torch_docs/index.html index 860b263..68c6957 100644 --- a/releases/1.33.0/torch_v2/torch_docs/index.html +++ b/releases/1.33.0/torch_v2/torch_docs/index.html @@ -1,8 +1,7 @@ - - + AIMET: AI Model Efficiency Toolkit Documentation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -117,19 +116,18 @@
      -
      +

      AIMET: AI Model Efficiency Toolkit Documentation

      AI Model Efficiency Toolkit (AIMET) provides tools enabling users to quantize and compress PyTorch models. Quantization is an essential step when deploying models to edge devices with fixed-point AI accelerators.

      AIMET provides both post-training and fine-tuning techniques to minimize accuracy loss incurred when quantizing floating-point models.

      -../_images/AIMET_index_no_fine_tune.png - +../_images/AIMET_index_no_fine_tune.png

      The above picture shows a high-level view of the workflow when using AIMET. The user passes a trained floating-point model to AIMET’s APIs for quantization. AIMET returns a new PyTorch model simulating low-precision inference, which users can fine-tune to recover lost accuracy. Users can then export the quantized model via ONNX/torchscript to an on-target runtime like Qualcomm® Neural Processing SDK.

      -
      +
      Qualcomm® Neural Processing SDK is a product of Qualcomm Technologies, Inc. and/or its subsidiaries.
      -
      -

      +
      +
      diff --git a/releases/1.33.0/torch_v2/torch_docs/quantized_modules.html b/releases/1.33.0/torch_v2/torch_docs/quantized_modules.html index dd93956..932275d 100644 --- a/releases/1.33.0/torch_v2/torch_docs/quantized_modules.html +++ b/releases/1.33.0/torch_v2/torch_docs/quantized_modules.html @@ -1,8 +1,7 @@ - - + Quantized Modules — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -123,7 +122,7 @@

      Warning

      This feature is under heavy development and API changes may occur without notice in future versions.

      -
      +

      Quantized Modules

      To simulate the effects of running networks at a reduced bitwidth, AIMET provides quantized versions of standard torch.nn.Modules. These quantized modules serve as drop-in replacements for their PyTorch counterparts, but can @@ -145,7 +144,7 @@

      Quantized ModulesFakeQuantizationMixin-derived module. AIMET provides extensive coverage of FakeQuantizationMixin for torch.nn.Module layer types, and more limited coverage for QuantizationMixin layers. See the table below for a full list of module coverage.

      -
      + +

      Configuration

      The quantization behavior of a quantized module is controlled by the quantizers contained within the input, output, and parameter quantizer attributes listed below.

      MinMaxEncodingAnalyzer(shape)

      Encoding Analyzer for Min-Max calibration technique

      +++++ @@ -284,8 +288,8 @@

      Configuration + +

      Computing Encodings

      Before a module can compute a quantized forward pass, all quantizers must first be calibrated inside a compute_encodings context. When a quantized module enters the compute_encodings context, it first disables all input and output quantization @@ -308,10 +312,15 @@

      Computing Encodings +

      +

      Attribute

      Type

      +++++ @@ -1145,8 +1154,8 @@

      Quantized Module Classes - - + Quantizers — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -121,9 +120,9 @@
      -
      +

      Quantizers

      -
      +

      Top-level API

      @@ -146,7 +145,7 @@

      Top-level APIabstract get_encoding()[source]

      Return the quantizer’s encodings as an EncodingBase object

      -
      Return type:
      +
      Return type

      Optional[EncodingBase]

      @@ -157,7 +156,7 @@

      Top-level APIabstract get_legacy_encodings()[source]

      Returns a list of encodings, each represented as a List of Dicts

      -
      Return type:
      +
      Return type

      Optional[List[Dict]]

      @@ -168,7 +167,7 @@

      Top-level APIis_initialized()[source]

      Returns true if the quantization parameters are initialized.

      -
      Return type:
      +
      Return type

      bool

      @@ -208,7 +207,7 @@

      Top-level API -
      Parameters:
      +
      Parameters

      nn.Module

      FakeQuantizationMixin

      +++++ @@ -177,11 +180,11 @@

      Importsfrom aimet_torch.cross_layer_equalization import equalize_model

    • from aimet_torch.model_preparer import prepare_model

    • - +
      -
      +

      QuantizationSimModel

      -
      +

      Moving from QuantWrapper to Quantized Modules

      To enable quantization in QuantSim v1, modules are wrapped with a QuantizeWrapper. These wrapped modules can be accessed as follows:

      from aimet_torch.quantsim import QuantizationSimModel as QuantizationSimModelV1
      @@ -232,8 +235,8 @@ 

      Moving from QuantWrapper to Quantized Moduleshere.

      -

      -
      +
      +

      Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantizer

      In QuantSim v1, we relied on StaticGridQuantizer and LearnedGridQuantizer. For both, floating point quantization could be enabled based on QuantizationDataType passed in.

      from aimet_torch.tensor_quantizer import StaticGridPerChannelQuantizers
      @@ -253,8 +256,8 @@ 

      Moving from StaticGrid and LearnedGrid Quantizer to Affine and Float Quantiz

      From the wrapped module (QuantSim v1) or quantized module (QuantSim v2), the attributes to access the quantizers remain consistent: .input_quantizers for input quantizers, .output_quantizers for output quantizers, and .param_quantizers for parameter quantizers.

      For more information on Quantizers, please refer to the API reference guide here.

      -
      -
      + +
      - -
      + + +

      Deprecated Features

      There are some components that are tied to the QuantSim v1 design that are not needed in QuantSim v2. For example, all QuantSim v2 source code will be implemented in Python to provide easier debugging and improved portability. It is not recommended to use libpymo modules with QuantSim 2.0. Below, you can see a list of these features and the recommended migration guideline:

      -

      AIMET Classes

      aimet_torch

      +
      --++ @@ -443,9 +446,9 @@

      Code Examples - - + Quickstart Guide — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,12 +118,12 @@
      -
      +

      Quickstart Guide

      In this tutorial, we will go through the end-to-end process of using AIMET and PyTorch to create, calibrate, and export a simple quantized model. Note that this is intended to show the most basic workflow in AIMET. It is not meant to demonstrate the most state-of-the-art techniques available in AIMET.

      -
      +

      Overall flow

      1. Define the basic floating-point PyTorch model, training, and eval loops

      2. @@ -134,8 +133,8 @@

        Overall flow +

      +

      PyTorch prerequisites

      To see clearly what happens inside AIMET, let’s first start with some simple PyTorch code for defining, training, and evaluating a model. The code below is adapted from PyTorch’s @@ -222,11 +221,11 @@

      PyTorch prerequisites
      Floating point accuracy: 91.70999908447266
       

      -
      -
      +
      +

      Prepare the floating point model for quantization

      Before we can (accurately) simulate quantization, there are a couple important steps to take care of:

      -
      +

      1) Model preparation

      AIMET’s quantization simulation tool (QuantizationSimModel) expects the floating point model to conform to some specific guidelines. For example, QuantizationSimModel is only able to quantize math operations performed by @@ -279,8 +278,8 @@

      1) Model preparationrelu() and softmax() operations.

      -

      -
      +
      +

      2) BatchNorm fold

      When models are executed in a quantized runtime, batchnorm layers are typically folded into the weight and bias of an adjacent convolution layer whenever possible in order to remove unnecessary computations. To accurately simulate @@ -326,9 +325,9 @@

      2) BatchNorm foldIdentity (passthrough) layers where it previously had BatchNorm2d layers. Like the model_preparer step, this operation should not impact the model’s accuracy.

      -

      - -
      +
      +
      + +

      Fine-tune the model with quantization aware training

      If we’re not satisfied with our accuracy after applying quantization, there are some steps we can take to further optimize the quantized accuracy. One such step is quantization aware training (QAT), during which the model is trained @@ -500,8 +499,8 @@

      Fine-tune the model with quantization aware training

      - -
      + +

      Export the quantsim model

      Now that we are happy with our quantized model’s accuracy, we are ready to export the model with its quantization parameters.

      export_path = "/tmp/"
      @@ -514,8 +513,8 @@ 

      Export the quantsim model - - + AIMET AdaRound — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -119,7 +118,7 @@
      -
      +

      AIMET AdaRound

      AIMET quantization features, by default, use the “nearest rounding” technique for achieving quantization. @@ -134,12 +133,11 @@ setting and freezing parameter encodings before computing the encodings. Please refer the code example in the AdaRound API section.

      -../_images/adaround.png - -
      +../_images/adaround.png +

      AdaRound Use Cases

      -
      -
      +
      +

      Common terminology

        @@ -151,8 +149,8 @@

        Common terminology +

      +

      Use Cases

        @@ -192,18 +190,8 @@

        Use Cases

      -
      -
      -

      AdaRound API

      -

      Please refer to the links below to view the AdaRound API for each AIMET variant:

      -
        -
      • AdaRound for PyTorch

      • -
      • AdaRound for Tensorflow

      • -
      • AdaRound for Keras

      • -
      • AdaRound for ONNX

      • -
      -
      -

      + + diff --git a/releases/1.33.0/torch_v2/user_guide/auto_quant.html b/releases/1.33.0/torch_v2/user_guide/auto_quant.html index cc96b82..38243a2 100644 --- a/releases/1.33.0/torch_v2/user_guide/auto_quant.html +++ b/releases/1.33.0/torch_v2/user_guide/auto_quant.html @@ -1,8 +1,7 @@ - - + AIMET AutoQuant — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,9 +113,9 @@
      -
      +

      AIMET AutoQuant

      -
      +

      Overview

      AIMET offers a suite of neural network post-training quantization techniques. Often, applying these techniques in a specific sequence, results in better accuracy and performance. Without the AutoQuant feature, the AIMET @@ -126,8 +125,8 @@

      Overview +

      +

      Workflow

      Before entering the optimization workflow, AutoQuant performs the following preparation steps:

      @@ -150,17 +149,8 @@

      Workflow

      -
      -
      -

      AutoQuant API

      -

      Please refer to the links below to view the AutoQuant API for each AIMET variant:

      -
        -
      • AutoQuant for PyTorch

      • -
      • AutoQuant for Tensorflow

      • -
      • AutoQuant for ONNX

      • -
      -
      -
      +
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/bn_reestimation.html b/releases/1.33.0/torch_v2/user_guide/bn_reestimation.html index fbf99cc..df1a98c 100644 --- a/releases/1.33.0/torch_v2/user_guide/bn_reestimation.html +++ b/releases/1.33.0/torch_v2/user_guide/bn_reestimation.html @@ -1,8 +1,7 @@ - - + AIMET BN Re-estimation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,9 +113,9 @@
      -
      +

      AIMET BN Re-estimation

      -
      +

      Overview

      The BN Re-estimation feature utilizes a small subset of training data to individually re-estimate the statistics of the Batch Normalization (BN) layers in a model. These BN statistics are then used to adjust the quantization scale parameters @@ -132,8 +131,8 @@

      Overview +

      +

      Workflow

      BN-Re-estimation requires that

        @@ -151,19 +150,9 @@

        Workflow../_images/bn_reestimation.png - -

      -
      -

      BN Re-estimation API

      -

      Please refer to the links below to view the BN Re-estimation API for each AIMET variant:

      -
        -
      • BN Re-estimation for PyTorch

      • -
      • BN Re-estimation for Tensorflow

      • -
      • BN Re-estimation for Keras

      • -
      -
      -
      +../_images/bn_reestimation.png +
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/channel_pruning.html b/releases/1.33.0/torch_v2/user_guide/channel_pruning.html index 2902444..be221bd 100644 --- a/releases/1.33.0/torch_v2/user_guide/channel_pruning.html +++ b/releases/1.33.0/torch_v2/user_guide/channel_pruning.html @@ -1,8 +1,7 @@ - - + AIMET Channel Pruning — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,20 +113,20 @@
      -
      +

      AIMET Channel Pruning

      Channel Pruning is a model compression technique that reduces less-important input channels from layers in a given model. Currently AIMET supports Channel Pruning of Conv2d layers.

      -
      +

      Overall Procedure

      The following picture explains the different steps in Channel Pruning a given layer. These steps are repeated for all layers selected to be compressed in the order of their occurrence from the top of the model.

      ../_images/channel_pruning_1.png

      These individual steps are explained in more detail in the following sub-sections.

      -
      -
      +
      +

      Channel Selection

      For a given layer and a given compression ratio Channel Selection analyzes the magnitude of each input channel (based on the kernel weights for that channel) and chooses the channels with the least magnitude to be pruned.

      -
      -
      +
      +

      Winnowing

      Winnowing is used to remove input channels of weight matrix obtained from Channel Selection resulting in compressed tensors

      ../_images/cp_2.png @@ -139,13 +138,13 @@

      WinnowingWinnowing

      - -
      +
      +

      Weight Reconstruction

      As a final step in Channel Pruning, AIMET will adjust the weight and bias parameters of a layer that was pruned in an attempt to try and match the outputs of that layer to closely match the outputs prior to pruning.This is done by collecting random samples of the output of the layer from the original model and the corresponding input samples from the pruned model for that layer. AIMET then performs linear regression to adjust the layer parameters.

      ../_images/cp_4.jpg - - +
      + diff --git a/releases/1.33.0/torch_v2/user_guide/compression_feature_guidebook.html b/releases/1.33.0/torch_v2/user_guide/compression_feature_guidebook.html index d0810fb..9416af6 100644 --- a/releases/1.33.0/torch_v2/user_guide/compression_feature_guidebook.html +++ b/releases/1.33.0/torch_v2/user_guide/compression_feature_guidebook.html @@ -1,8 +1,7 @@ - - + AIMET Compression Features Guidebook — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,13 +113,12 @@
      -
      +

      AIMET Compression Features Guidebook

      This document provides typical workflows in order to compress a network using AIMET. A more in-depth discussion on various techniques and their usage is provided in User Guide

      AIMET supports network compression using the following techniques: Weight SVD, Spatial SVD (SSVD) and Channel Pruning (CP). These techniques are intended for Multiply-and-Accumulate (MAC) reduction of convolution layers in a neural network. Based on a configured desired MAC reduction ratio, i.e., MACs in compress model to MACs in uncompressed model, the compression algorithms automatically compress each individual convolution layer in the network to approximately reach the overall desired MAC reduction. Note that the actual on-target inference latency performance of a model depends on several factors MACs, memory and memory bandwidth, quantization, etc. Therefore, the improvement in runtime latency based on MAC reduction based compression may vary depending on the specific model architecture. Performance results for some typical models are provided in https://quic.github.io/aimet-pages/index.html. For best performance, a combination of spatial SVD followed by channel pruning is recommended. At high level, following steps should be performed to compress a network using SSVD + CP combination:

      -../_images/compression_flow.png - +../_images/compression_flow.png
      1. Determine the target compression ratio (C), which is the ratio of MACs in final compressed model to the MACs in the original uncompressed model. For example, target compression ratio = 0.5 indicates that the final model MACs are half of the original model MACs.

      2. Perform compression using Spatial SVD technique as follows:

      3. @@ -151,7 +149,7 @@
        1. In the final step, a model is selected with MAC ratio relative to the original uncompressed model is close to C and also meets user’s accuracy requirements. For example, for ResNet-50 results provided on https://quic.github.io/aimet-pages/index.html, Csvd = 0.75 and Ccp = 0.66 were used to achieve overall compression C = 0.5

        -
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/greedy_compression_ratio_selection.html b/releases/1.33.0/torch_v2/user_guide/greedy_compression_ratio_selection.html index f3edaee..f32fb70 100644 --- a/releases/1.33.0/torch_v2/user_guide/greedy_compression_ratio_selection.html +++ b/releases/1.33.0/torch_v2/user_guide/greedy_compression_ratio_selection.html @@ -1,8 +1,7 @@ - - + AIMET Greedy Compression Ratio Selection — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,13 +113,13 @@
      -
      +

      AIMET Greedy Compression Ratio Selection

      -
      +

      Overview

      The model compression methods, Spatial SVD and Channel Pruning work on per layer basis. Not all the layers in the given model are equally compressible. Compression of individual layers of a given model can have varying impact on the final accuracy of the model. Greedy Per Layer Compression Ratio Selection Algorithm is used to assess the sensitivity of applicable layers to compression and find appropriate compression-ratio for each individual layers. The algorithm makes sure that the entire model has highest remaining accuracy and also meets the given target compression-ratio.

      -
      -
      +
      +

      How it works

      The Greedy Compression Ratio Selection algorithm executes the following two steps:

        @@ -138,22 +137,22 @@

        How it works -

      -
      +
      +

      Per-layer Exploration

      For each layer, produces a column in the compression-ratio vs. model-performance table. This column captures the over all network performance values as the layer is compressed by predefined range of compression ratio candidates, while all other layers are left unmodified.

      ../_images/greedy_4.jpg

      In the above figure, you see an example model with 4 layers, and 10 compression-ratio candidates (which is the default setting). Note that the table does not capture the eval score for the last candidate which is always compression-ratio=1.0 (since this score is the baseline score and known already).

      Monotonic Fit: In some cases it is observed that the model performance is not a strict increasing function of increasing compression-ratios. To help with the greedy selection procedure, AIMET can apply a curve-fit scheme to try and fit the model-performance numbers for a given layer using a monotonically increasing function. The functionality is disabled by default.

      - -
      +
      +

      Compression Ratio Selection

      This step is the core of the algorithm. It considers the compression-ratio vs. model-performance table for each applicable layer from the previous step, target compression ratio and function to calculate the cost of the compressed model depending on the compression method (Spatial SVD, Channel Pruning) used. It starts with a constant accuracy and finds the corresponding compression ratio for every applicable layer by interpolating from compression-ratio vs. model-performance evaluation table. The algorithm, then calculates total cost of the model to see if we have met our target compression ratio or not. Binary search algorithm is used to find the solution quickly. Finally it returns list of selected compression ratios for all applicable layers. This way, the algorithm achieves the highest remaining final accuracy of the compressed model and meet target compression ratio.

      The following figure illustrates that for a given accuracy, the compression ratio for each layer is different.

      ../_images/greedy_5.jpg

      As suggested by above diagram, the algorithm picks a lower compression ratio (higher compression) for layers which are more compressible and pick a higher compression ratio (lower compression) for layers which are less compressible (For lesser compressible layers the accuracy falls drstically if compression ratio is lowered).

      - - +
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/index.html b/releases/1.33.0/torch_v2/user_guide/index.html index 6db484c..e84dd0f 100644 --- a/releases/1.33.0/torch_v2/user_guide/index.html +++ b/releases/1.33.0/torch_v2/user_guide/index.html @@ -1,8 +1,7 @@ - - + AI Model Efficiency Toolkit User Guide — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,9 +113,9 @@
      -
      +

      AI Model Efficiency Toolkit User Guide

      -
      +

      Overview

      AI Model Efficiency Toolkit (AIMET) is a software toolkit that enables users to quantize and compress models. Quantization is a must for efficient edge inference using fixed-point AI accelerators.

      @@ -129,8 +128,8 @@

      Overview +

      +

      Features

      AIMET supports two sets of model optimization techniques:

        @@ -140,16 +139,16 @@

        Features +

      +

      Release Information

      For information specific to this release, please see Release Notes and Known Issues.

      -
      -
      +
      +

      Installation Guide

      Please visit the AIMET Installation for more details.

      -
      -
      +
      + - - - +
      + + diff --git a/releases/1.33.0/torch_v2/user_guide/known_issues.html b/releases/1.33.0/torch_v2/user_guide/known_issues.html index d732893..59d7898 100644 --- a/releases/1.33.0/torch_v2/user_guide/known_issues.html +++ b/releases/1.33.0/torch_v2/user_guide/known_issues.html @@ -1,8 +1,7 @@ - - + AIMET Known Issues — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,7 +113,7 @@
      -
      +

      AIMET Known Issues

      Known issues and limitations for Qualcomm AI Model Efficiency ToolKit (AIMET)

        @@ -129,7 +128,7 @@
      -
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/model_compression.html b/releases/1.33.0/torch_v2/user_guide/model_compression.html index a80c5b1..b0611b1 100644 --- a/releases/1.33.0/torch_v2/user_guide/model_compression.html +++ b/releases/1.33.0/torch_v2/user_guide/model_compression.html @@ -1,8 +1,7 @@ - - + AIMET Model Compression — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,25 +113,25 @@
      -
      +

      AIMET Model Compression

      -
      +

      Overview

      AIMET provides a model compression library that can be used to reduce a model’s MAC and memory costs with a minimal drop in accuracy. AIMET supports various compression schemes like Weight SVD, Spatial SVD and Channel Pruning.

      Please see the Compression Guidebook - which includes some practical advice on using the compression features, and how to combine the features

      -
      -
      +
      +

      Use Case

      AIMET allows user to take a trained model and compress it to desired compression ratio which can be further fine-tuned and exported to a target. All of the compression schemes in AIMET use a two-step process - Compression ratio selection followed by model compression.

      ../_images/compression_use_case.PNG

      The following sub-sections explain these steps in more detail.

      -
      -
      +
      +

      Compression ratio selection

      @@ -146,8 +145,8 @@

      Compression ratio selectionVisualization

      - -
      +
      +

      Model Compression

      In this phase, AIMET will apply the compression ratios per layer to create a compressed model. Currently, AIMET supports the following model compression algorithms.

      @@ -158,26 +157,26 @@

      Model CompressionChannel Pruning

      - -
      + +

      Optional techniques to get better compression results

      AIMET supports the following techniques that can be optionally used to get better compression results

      • Rank-rounding

      • Per-layer fine-tuning

      -
      +

      Rank Rounding

      Often ML runtime-software like those for Embedded ML accelerators, will prefer the dimensions of layers like Conv2d or FC to be of a certain multiplicity. Matching the expected dimension size will result in optimal runtime for that layer. AIMET techniques like Weight/Spatial SVD or Channel Pruning, try to decompose layers or reduce layers - specifically in terms of output channels and input channels. The rank-rounding feature in AIMET will try and reduce layers to match a user-provided multiplicity. By default this feature is disabled. At present, AIMET allows the user to specify a multiplicity-factor for the entire model, not on a per-layer basis.

      Users can make use of this feature to generate more optimal models for running on embedded targets.

      -
      -
      +
      +

      Per-layer Fine-tuning

      Given a user-model and desired compression-ratio, the user may sometimes notice a sharp degradation in accuracy after compression but before fine-tuning. One technique that might help the overall compression of such scenarios, is using a feature called per-layer fine-tuning. When this feature is selected, AIMET invokes a user-provided fine-tuning function after compressing every layer that was selected for compression. This is done during the Model Compression phase in the diagram shown above.

      Note: The user is responsible for choosing appropriate learning-rates and other training parameters for fine-tuning. Using this feature may require the user to carefully pick the learning rates and learning-rate-decay parameters to be used during fine-tuning.

      -
      - -
      + + +
      -
      + +

      References

      1. Xiangyu Zhang, Jianhua Zou, Kaiming He, and Jian Sun. “Accelerating Very Deep Convolutional Networks for Classification and Detection.” IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 38, no. 10, pp. 1943-1955, 1 Oct. 2016.

      2. @@ -205,8 +204,8 @@

        References - - + Model Guidelines for PyTorch — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,12 +113,17 @@
        -
        +

        Model Guidelines for PyTorch

        To implement the Cross Layer Equalization API, aimet_torch.cross_layer_equalization.equalize_model(), AIMET creates a computing graph to analyze the sequence of Operations in the model. If your model is defined using certain constructs, it restricts AIMET from successfully creating and analyzing the computing graph. The following table lists the potential issues and workarounds.

        Note: These restrictions are not applicable, if you are using the Primitive APIs

      Component in v1

      +++++ @@ -177,7 +181,7 @@

      Model Guidelines for PyTorch - - + AIMET Model Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,7 +113,7 @@
      -
      +

      AIMET Model Quantization

      Models are generally trained on floating-point hardware like CPUs and GPUs. However, when these trained models are run on quantized hardware that support fixed-precision operations, model parameters are converted from floating-point @@ -125,7 +124,7 @@

      AIMET provides multiple techniques and tools which help to create quantized models with a minimal loss in accuracy relative to floating-point models.

      This section provides information on typical use cases and AIMET’s quantization features.

      -
      +

      Use Cases

      1. Predict on-target accuracy: AIMET enables a user to simulate the effects of quantization to get a first order estimate of the model’s accuracy when run on quantized targets. This is useful to get an estimate of on-target accuracy @@ -155,8 +154,8 @@

      Use Cases

      -
      -
      +
      +

      AIMET Quantization Features

      @@ -191,7 +190,7 @@

      AIMET Quantization Features +

      Post-Training Quantization

      • Post-Training Quantization (PTQ) Techniques:

        @@ -230,8 +229,8 @@

        Post-Training Quantization +

      +

      Debugging/Analysis Tools

      @@ -254,9 +253,9 @@

      Debugging/Analysis Tools -

      - -
      +
      +
      +

      AIMET Quantization Workflow

      This section describes the recommended workflow for quantizing a neural network.

      @@ -265,7 +264,7 @@

      AIMET Quantization Workflow +

      PyTorch

      @@ -289,8 +288,8 @@

      PyTorch

      - -
      +
      +

      Tensorflow

      @@ -327,17 +326,17 @@

      Tensorflow +

      + +

      Debugging Guidelines

      Applying AIMET Quantization features may involve some trial and error in order to find the best optimizations to apply on a particular model. We have included some debugging steps in the Quantization Guidebook that can be tried when quantization accuracy does not seem to improve right off the bat.

      - - +
      + diff --git a/releases/1.33.0/torch_v2/user_guide/post_training_quant_techniques.html b/releases/1.33.0/torch_v2/user_guide/post_training_quant_techniques.html index 9c8dcdc..94a1fbf 100644 --- a/releases/1.33.0/torch_v2/user_guide/post_training_quant_techniques.html +++ b/releases/1.33.0/torch_v2/user_guide/post_training_quant_techniques.html @@ -1,8 +1,7 @@ - - + AIMET Post-Training Quantization Techniques — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,13 +113,13 @@
      -
      +

      AIMET Post-Training Quantization Techniques

      -
      +

      Overview

      It is observed that some ML models show reduced inference accuracy when run on quantized hardware due to approximation noises. AIMET provides post-training quantization techniques that help adjust the parameters in the model such that the model becomes more quantization-friendly. AIMET post-training quantizations are designed to be applied on pre-trained ML models. These techniques are explained as part of the “Data-Free Quantization Through Weight Equalization and Bias Correction” paper at ICCV 2019 - https://arxiv.org/abs/1906.04721

      -
      -
      +
      +
      -
      -

      Cross-Layer Equalization API

      -

      Please refer to the links below to view the Cross-Layer Equalization API for each AIMET variant:

      -
        -
      • Cross-Layer Equalization for PyTorch

      • -
      • Cross-Layer Equalization for Tensorflow

      • -
      • Cross-Layer Equalization for Keras

      • -
      • Cross-Layer Equalization for ONNX

      • -
      -
      -
      +
      +

      FAQs

      1. @@ -198,14 +187,14 @@

        FAQs

      - -
      +
      +

      References

      1. Markus Nagel, Mart van Baalen, Tijmen Blankevoort, Max Welling. “Data-Free Quantization Through Weight Equalization and Bias Correction.” IEEE International Conference on Computer Vision (ICCV), Seoul, October 2019.

      - - +
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/quant_analyzer.html b/releases/1.33.0/torch_v2/user_guide/quant_analyzer.html index 7f0ec68..a095d9a 100644 --- a/releases/1.33.0/torch_v2/user_guide/quant_analyzer.html +++ b/releases/1.33.0/torch_v2/user_guide/quant_analyzer.html @@ -1,8 +1,7 @@ - - + AIMET QuantAnalyzer — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,15 +113,15 @@
      -
      +

      AIMET QuantAnalyzer

      -
      +

      Overview

      The QuantAnalyzer feature analyzes the model for quantization and points out sensitive parts/hotspots in the model. The analyses are performed automatically, and only requires the user to pass in callbacks for performing forward pass and evaluation, and optionally a dataloader for MSE loss analysis.

      For each analysis, QuantAnalyzer outputs json and/or html files containing data and plots for easy visualization.

      -
      -
      +
      +
      -
      +
      +

      Detailed Analysis Descriptions

      QuantAnalyzer performs the following analyses:

        @@ -193,18 +192,8 @@

        Detailed Analysis Descriptions -

        QuantAnalyzer API

        -

        Please refer to the links below to view the QuantAnalyzer API for each AIMET variant:

        -
          -
        • QuantAnalyzer for PyTorch

        • -
        • QuantAnalyzer for Tensorflow

        • -
        • QuantAnalyzer for Keras

        • -
        • QuantAnalyzer for ONNX

        • -
        - - +

      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/quantization_aware_training.html b/releases/1.33.0/torch_v2/user_guide/quantization_aware_training.html index 65e4066..e3049d6 100644 --- a/releases/1.33.0/torch_v2/user_guide/quantization_aware_training.html +++ b/releases/1.33.0/torch_v2/user_guide/quantization_aware_training.html @@ -1,8 +1,7 @@ - - + AIMET Quantization Aware Training — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,16 +113,16 @@
      -
      +

      AIMET Quantization Aware Training

      -
      +

      Overview

      In cases where PTQ techniques are not sufficient for mitigating quantization error, users can use quantization-aware training (QAT). QAT models the quantization noise during training and allows the model to find better solutions than post-training quantization. However, the higher accuracy comes with the usual costs of neural network training, i.e. longer training times, need for labeled data and hyperparameter search.

      -
      -
      +
      +

      QAT workflow

      The QAT workflow is largely similar to the flow for using Quantization Simulation for inference. The only difference is that a user can take the sim.model and use it in their training pipeline in order to fine-tune model parameters while @@ -139,8 +138,8 @@

      QAT workflow +

      +

      QAT modes

      There are two variants of QAT, referred to as QAT without Range Learning and QAT with Range Learning.

      In QAT without Range Learning, encoding values for activation quantizers are found once in the beginning during the @@ -150,8 +149,8 @@

      QAT modes +

      +

      Recommendations for Quantization-Aware Training

      Here are some general guidelines that can aid in improving performance or faster convergence with Quantization-aware Training (QAT):

        @@ -174,8 +173,8 @@

        Recommendations for Quantization-Aware Training - - + Quantization Simulation Configuration — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,9 +113,9 @@
        -
        +

        Quantization Simulation Configuration

        -
        +

        Overview

        AIMET allows the configuration of quantizer placement and settings in accordance with a set of rules specified in a json configuration file, applied when the Quantization Simulation API is called.

        Settings such as quantizer enablement, per channel quantization, symmetric quantization, and specifying fused ops when quantizing can be configurated. @@ -126,15 +125,15 @@

        Overview +

        +

        Configuration File Structure

        The configuration file contains six main sections, in increasing amounts of specificity:

        ../_images/quantsim_config_file.png

        Rules defined in a more general section can be overruled by subsequent rules defined in a more specific case. For example, one may specify in “defaults” for no layers to be quantized, but then turn on quantization for specific layers in the “op_type” section.

        -
        -
        +
        +

        How to configure individual Configuration File Sections

        When working with a new runtime with different rules, or for experimental purposes, users can refer to this section to understand how to configure individual sections in a configuration file.

          @@ -396,8 +395,8 @@

          How to configure individual Configuration File Sections

        -
        -

      +
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/quantization_feature_guidebook.html b/releases/1.33.0/torch_v2/user_guide/quantization_feature_guidebook.html index bdfe86d..c663c60 100644 --- a/releases/1.33.0/torch_v2/user_guide/quantization_feature_guidebook.html +++ b/releases/1.33.0/torch_v2/user_guide/quantization_feature_guidebook.html @@ -1,8 +1,7 @@ - - + AIMET Quantization Features Guidebook — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,7 +113,7 @@
      -
      +

      AIMET Quantization Features Guidebook

      AIMET supports various neural network quantization techniques. A more in-depth discussion on various techniques and their usage is provided in User Guide

      @@ -139,8 +138,7 @@ 1. Apply CLE if not already implemented, especially for models with depth-wise separable convolutions. 2. Try per-channel quantization. This will address the issue of uneven per-channel weight distribution. 3. Apply bias correction or AdaRound if calibration data is available

      -../_images/quantization_debugging_flow_chart.png - +../_images/quantization_debugging_flow_chart.png

      Fixing activation quantization To reduce the quantization error from activation quantization, we can also try using different range setting methods or adjust CLE to take activation quantization ranges into account, as vanilla CLE can lead to uneven activation @@ -161,7 +159,7 @@

      After completing the above steps, the last step is to quantize the complete model to the desired bit-width. If the accuracy is acceptable, we have our final quantized model ready to use. Otherwise, we can consider higher bit-widths and smaller granularities or revert to more powerful quantization methods, such as quantization-aware training.

      -
      +
      diff --git a/releases/1.33.0/torch_v2/user_guide/quantization_sim.html b/releases/1.33.0/torch_v2/user_guide/quantization_sim.html index c0be1e3..0c4dc13 100644 --- a/releases/1.33.0/torch_v2/user_guide/quantization_sim.html +++ b/releases/1.33.0/torch_v2/user_guide/quantization_sim.html @@ -1,8 +1,7 @@ - - + AIMET Quantization Simulation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -116,9 +115,9 @@
      -
      +

      AIMET Quantization Simulation

      -
      +

      Overview

      AIMET’s Quantization Simulation feature provides functionality to simulate the effects of quantized hardware. This allows the user to then apply post-training and/or fine-tuning techniques in AIMET to recover the loss in accuracy, and @@ -136,8 +135,8 @@

      Overview +

      +

      QuantSim Workflow

      A typical workflow for using AIMET quantization simulation to simulate on-target quantized accuracy is described below.

        @@ -156,8 +155,8 @@

        QuantSim Workflow +

      +

      Simulating Quantization Noise

      The diagram below explains how quantization noise is introduced to a model when its input, output or parameters are quantized and dequantized.

      @@ -168,8 +167,8 @@

      Simulating Quantization Noise +

      +

      Determining Quantization Parameters (Encodings)

      Using a QuantSim model, AIMET analyzes and determines the optimal quantization encodings (scale and offset parameters) for each quantizer op.

      @@ -189,8 +188,8 @@

      Determining Quantization Parameters (Encodings)\(\textrm{Delta} = \dfrac{\textrm{Max} - \textrm{Min}}{{2}^{\textrm{bitwidth}} - 1} \quad \textrm{Offset} = \dfrac{-\textrm{Min}}{\textrm{Delta}}\)

      -

      -
      +
      +

      Quantization Schemes

      AIMET supports various techniques for coming up with min and max values for encodings, also called quantization schemes:

      -
      +
      +

      Configuring Quantization Simulation Ops

      Different hardware and on-device runtimes may support different quantization choices for neural network inference. For example, some runtimes may support asymmetric quantization for both activations and weights, whereas other ones may @@ -241,18 +240,8 @@

      Configuring Quantization Simulation OpsQuantization Simulation Configuration page which describes the configuration options in detail.

      - -
      -

      Quantization Simulation APIs

      -

      Please refer to the links below to view the Quantization Simulation API for each AIMET variant:

      -
        -
      • Quantization Simulation for PyTorch

      • -
      • Quantization Simulation for Tensorflow

      • -
      • Quantization Simulation for Keras

      • -
      • Quantization Simulation for ONNX

      • -
      -
      -
      +

      +

      Frequently Asked Questions

      -
      +

      New Features

      We have now enabled blockwise quantization and low power blockwise quantization for QuantSim v2 users. When applied, these features obtain encoding parameters with a finer granularity, which produces a more optimized quantization grid.

      To learn more, please refer to the following documentation:

      @@ -155,9 +154,9 @@

      New FeaturesBlockwise Quantization

    • Low Power Blockwise Quantization

    • -

      - - +
      + + diff --git a/releases/1.33.0/torch_v2/user_guide/release_notes.html b/releases/1.33.0/torch_v2/user_guide/release_notes.html index c6cd356..4b6ba8c 100644 --- a/releases/1.33.0/torch_v2/user_guide/release_notes.html +++ b/releases/1.33.0/torch_v2/user_guide/release_notes.html @@ -1,8 +1,7 @@ - - + AIMET Release Notes — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,10 +113,10 @@
      -
      +

      AIMET Release Notes

      Release Notes for Qualcomm AI Model Efficiency ToolKit (AIMET)

      -
      +
      -
      +
      +

      1.22.1

      -
      -
      +
      +

      1.22.0

      - -
      +
      +

      1.21.0

      - -
      +
      + +

      1.19.1.py37

      - -
      +
      +

      1.19.1

      - -
      +
      +

      1.18.0.py37

      - -
      +
      + +

      1.17.0.py37

      - -
      +
      + +

      1.16.2.py37

      - -
      +
      +

      1.16.2

      - -
      +
      +

      1.16.1.py37

      Documentation

      - -
      +
      +

      1.16.1

      - -
      +
      + + + +
      diff --git a/releases/1.33.0/torch_v2/user_guide/spatial_svd.html b/releases/1.33.0/torch_v2/user_guide/spatial_svd.html index 2980e93..b82f85d 100644 --- a/releases/1.33.0/torch_v2/user_guide/spatial_svd.html +++ b/releases/1.33.0/torch_v2/user_guide/spatial_svd.html @@ -1,8 +1,7 @@ - - + AIMET Spatial SVD — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,14 +113,13 @@
      -
      +

      AIMET Spatial SVD

      Spatial SVD is a tensor decomposition technique which decomposes one large layer (in terms of mac or memory) into two smaller layers. SVD stands for Singular Value Decomposition.

      Given a conv layer, with kernel (𝑚,𝑛,ℎ,𝑤) where 𝑚 is the input channels, 𝑛 the output channels, and ℎ, 𝑤 giving the height and width of the kernel itself, Spatial SVD will decompose the kernel into two kernels. One of size (𝑚,𝑘,ℎ,1) and one of size (𝑘,𝑛,1,𝑤), where k is called the rank. The smaller the value of k the larger the degree of compression achieved.

      The following diagram illustrates this visually. As you can see, Spatial SVD decomposes both the output channel dimension as well as the size of the conv kernel itself. Spatial SVD is currently supported for Conv layers in AIMET.

      -../_images/spatial_svd.png - -
      +../_images/spatial_svd.png +
      diff --git a/releases/1.33.0/torch_v2/user_guide/visualization_compression.html b/releases/1.33.0/torch_v2/user_guide/visualization_compression.html index 1d511ce..4120aaa 100644 --- a/releases/1.33.0/torch_v2/user_guide/visualization_compression.html +++ b/releases/1.33.0/torch_v2/user_guide/visualization_compression.html @@ -1,8 +1,7 @@ - - + AIMET Visualization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,32 +113,32 @@
      -
      +

      AIMET Visualization

      -
      +

      Overview

      AIMET Visualization adds analytical capability to the AIMET tool (which helps quantize and compress ML models) through visualization. It provide more detailed insights in to AIMET features as users are able to analyze a model’s layers in terms of compressibility and also highlight potential issues when applying quantization. The tool also assists in displaying progress for computationally heavy tasks.

      -
      -
      +
      +

      Design

      Given a model, a user can start a Bokeh server session and then invoke functions which will produce visualizations to help analyze and understand the model before using AIMET features from quantization and compression

      ../_images/vis_1.png -
      -
      +
      +

      Compression

      Evaluation scores during compression are displayed in a table as they are computed and users can see the progress displayed while computing these scores. After Greedy Selection has run, the optimal compression ratios are also displayed in a graph

      ../_images/vis_4.png ../_images/vis_5.png ../_images/vis_6.png ../_images/vis_7.png - -
      +
      +

      Starting a Bokeh Server Session:

      Start a bokeh server by typing this command: bokeh serve –allow-websocket-origin=<host name>:<port number> –port=<port number>

      –allow-websocket-origin tells the Bokeh server which network addresses to listen on, again not typically needed for local It is not need just to view locally.

      –port tells the Bokeh server what network port to listen on rather than the default port of 5006

      - -
      +
      +

      How to use the tool

      Model Compression

        @@ -195,8 +194,8 @@

        How to use the tool - - + AIMET Visualization for Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,18 +113,18 @@
        -
        +

        AIMET Visualization for Quantization

        -
        +

        Overview

        AIMET Visualization adds analytical capability to the AIMET tool (which helps quantize and compress ML models) through visualization. It provides more detailed insights into AIMET features as users are able to analyze a model’s layers in terms of compressibility and also highlight potential issues when applying quantization. The tool also assists in displaying progress for computationally heavy tasks. The visualizations get saved as an HTML file under the specified directory.

        -
        -
        +
        +

        Quantization

        During quantization, common parameters are used throughout a layer for converting the floating point weight values to INT8. If the dynamic range in weights is very high, the quantization will not be very granular. To equalize the weight range we apply Cross Layer Equalization. In order to understand if we need to apply Cross Layer Equalization, we can visualize the weight range for every channel in a layer. If the weight range varies a lot over the various channels, applying cross layer equalization helps in improving the Quantization accuracy.

        ../_images/vis_3.png -
        +

        PyTorch

        In PyTorch, we can visualize the weights for a model. We can also visualize the weight ranges for a model before and after Cross Layer Equalization. There are three main functions a user can invoke:

        @@ -134,8 +133,8 @@

        PyTorch +

        +

        TensorFlow

        In TensorFlow, we can visualize the weight ranges and relative weight ranges over various channels in a layer. User can also use the same functions to see the changes in a layer weight ranges before and after Cross Layer Equalization.

        @@ -144,9 +143,9 @@

        TensorFlow - - + AIMET Weight SVD — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,14 +113,13 @@
        -
        +

        AIMET Weight SVD

        Weight SVD is a tensor decomposition technique which decomposes one large layer (in terms of mac or memory) into two smaller layers. SVD stands for Singular Value Decomposition.

        Given a neural network layer, with kernel (𝑚,𝑛,ℎ,𝑤) where 𝑚 is the input channels, 𝑛 the output channels, and ℎ, 𝑤 giving the height and width of the kernel itself, Weight SVD will decompose the kernel into one of size (𝑚,𝑘,1,1) and another of size (𝑘,𝑛,h,𝑤), where 𝑘 is called the rank. The smaller the value of 𝑘 the larger the degree of compression achieved.

        The following diagram illustrates this visually. As you can see, Weight SVD decomposes the output channel dimension. Weight SVD is currently supported for Conv and Full-connected layers in AIMET.

        -../_images/weight_svd.png - -
        +../_images/weight_svd.png +
        diff --git a/releases/1.33.0/torch_v2/user_guide/winnowing.html b/releases/1.33.0/torch_v2/user_guide/winnowing.html index 8221609..8b05141 100644 --- a/releases/1.33.0/torch_v2/user_guide/winnowing.html +++ b/releases/1.33.0/torch_v2/user_guide/winnowing.html @@ -1,8 +1,7 @@ - - + AIMET Winnowing — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -114,18 +113,18 @@
        -
        +

        AIMET Winnowing

        -
        +

        Overview

        The model compression algorithm, Channel Pruning, identifies modules in a model, whose subset of input channels could be pruned without losing much accuracy. Unless explicitly removed, these input channels take up memory and add to unnecessary computation. For each identified module, the Winnow tool removes the input channels that were selected for pruning. Only Conv2D layers are supported for winnowing.

        -
        -
        +
        +

        Winnowing Overview

        The following figure provides a pictorial overview of Winnowing. In this example, a module in a model has an input volume of HxWx8, where H = Height, W = Width and Number of input Channels = 8. The Channel Pruning algorithm identifies that for this module, input channels 1, 4 and 7 should be pruned. Winnowing removes the identified input channels from this modules. The module’s input volume is now reduced to HxWx5.

        ../_images/winnow_1.png -
        -
        +
        +

        How Winnowing Works

        When the number of input channels of a Conv module is reduced, the output channels of the module above it must also be modified. If the module above is a another Conv layer, that Conv layer’s output channels are also reduced to match the number of input channels of the winnowed Conv module. If the module above is NOT a Conv layer (e.g., BatchNorm, ReLU), that module simply propagates the changes upstream. That is both the output and the input channels of teh BatchNorm and ReLU modules are winnowed to match the winnowed channels of the Conv layer just below them.

        The following figure explains a very simple scenario. In this scenario, a Conv module has been identified for winnowing a sub set of its input channels. This is indicated by green color on the left side of the figure. The right side of the figure indicates the actions taken by Winnowing. Winnowing consists of the following changes done to the 3 affected modules.

        @@ -133,8 +132,8 @@

        How Winnowing Works -

        -
        +
        +

      diff --git a/releases/1.33.0/user_guide/adaround.html b/releases/1.33.0/user_guide/adaround.html index 36b3e15..84a1c3b 100644 --- a/releases/1.33.0/user_guide/adaround.html +++ b/releases/1.33.0/user_guide/adaround.html @@ -1,8 +1,7 @@ - - + AIMET AdaRound — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,7 +1109,7 @@
        -
        +

        AIMET AdaRound

        AIMET quantization features, by default, use the “nearest rounding” technique for achieving quantization. @@ -1132,12 +1124,11 @@ setting and freezing parameter encodings before computing the encodings. Please refer the code example in the AdaRound API section.

        -../_images/adaround.png - -
        +../_images/adaround.png +

        AdaRound Use Cases

        -
        -
        +
        +

        Common terminology

          @@ -1149,8 +1140,8 @@

          Common terminology +

        +

        Use Cases

          @@ -1190,18 +1181,8 @@

          Use Cases

        -
        -
        -

        AdaRound API

        -

        Please refer to the links below to view the AdaRound API for each AIMET variant:

        - -
        - +
        +
    • diff --git a/releases/1.33.0/user_guide/auto_quant.html b/releases/1.33.0/user_guide/auto_quant.html index b059170..332ce4e 100644 --- a/releases/1.33.0/user_guide/auto_quant.html +++ b/releases/1.33.0/user_guide/auto_quant.html @@ -1,8 +1,7 @@ - - + AIMET AutoQuant — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,20 +1109,20 @@
        -
        +

        AIMET Channel Pruning

        Channel Pruning is a model compression technique that reduces less-important input channels from layers in a given model. Currently AIMET supports Channel Pruning of Conv2d layers.

        -
        +

        Overall Procedure

        The following picture explains the different steps in Channel Pruning a given layer. These steps are repeated for all layers selected to be compressed in the order of their occurrence from the top of the model.

        ../_images/channel_pruning_1.png

        These individual steps are explained in more detail in the following sub-sections.

        -
        -
        +
        +

        Channel Selection

        For a given layer and a given compression ratio Channel Selection analyzes the magnitude of each input channel (based on the kernel weights for that channel) and chooses the channels with the least magnitude to be pruned.

        -
        -
        +
        +

        Winnowing

        Winnowing is used to remove input channels of weight matrix obtained from Channel Selection resulting in compressed tensors

        ../_images/cp_2.png @@ -1142,13 +1134,13 @@

        WinnowingWinnowing

      - -
      + +

      Weight Reconstruction

      As a final step in Channel Pruning, AIMET will adjust the weight and bias parameters of a layer that was pruned in an attempt to try and match the outputs of that layer to closely match the outputs prior to pruning.This is done by collecting random samples of the output of the layer from the original model and the corresponding input samples from the pruned model for that layer. AIMET then performs linear regression to adjust the layer parameters.

      ../_images/cp_4.jpg -
      - + + diff --git a/releases/1.33.0/user_guide/compression_feature_guidebook.html b/releases/1.33.0/user_guide/compression_feature_guidebook.html index 03b02c6..b106694 100644 --- a/releases/1.33.0/user_guide/compression_feature_guidebook.html +++ b/releases/1.33.0/user_guide/compression_feature_guidebook.html @@ -1,8 +1,7 @@ - - + AIMET Compression Features Guidebook — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,13 +1109,12 @@
        -
        +

        AIMET Compression Features Guidebook

        This document provides typical workflows in order to compress a network using AIMET. A more in-depth discussion on various techniques and their usage is provided in User Guide

        AIMET supports network compression using the following techniques: Weight SVD, Spatial SVD (SSVD) and Channel Pruning (CP). These techniques are intended for Multiply-and-Accumulate (MAC) reduction of convolution layers in a neural network. Based on a configured desired MAC reduction ratio, i.e., MACs in compress model to MACs in uncompressed model, the compression algorithms automatically compress each individual convolution layer in the network to approximately reach the overall desired MAC reduction. Note that the actual on-target inference latency performance of a model depends on several factors MACs, memory and memory bandwidth, quantization, etc. Therefore, the improvement in runtime latency based on MAC reduction based compression may vary depending on the specific model architecture. Performance results for some typical models are provided in https://quic.github.io/aimet-pages/index.html. For best performance, a combination of spatial SVD followed by channel pruning is recommended. At high level, following steps should be performed to compress a network using SSVD + CP combination:

        -../_images/compression_flow.png - +../_images/compression_flow.png
        1. Determine the target compression ratio (C), which is the ratio of MACs in final compressed model to the MACs in the original uncompressed model. For example, target compression ratio = 0.5 indicates that the final model MACs are half of the original model MACs.

        2. Perform compression using Spatial SVD technique as follows:

        3. @@ -1154,7 +1145,7 @@
          1. In the final step, a model is selected with MAC ratio relative to the original uncompressed model is close to C and also meets user’s accuracy requirements. For example, for ResNet-50 results provided on https://quic.github.io/aimet-pages/index.html, Csvd = 0.75 and Ccp = 0.66 were used to achieve overall compression C = 0.5

          -
        +
        diff --git a/releases/1.33.0/user_guide/examples.html b/releases/1.33.0/user_guide/examples.html index 81932a7..3abd85f 100644 --- a/releases/1.33.0/user_guide/examples.html +++ b/releases/1.33.0/user_guide/examples.html @@ -1,8 +1,7 @@ - - + AIMET Examples — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,24 +1109,24 @@
        ../_images/logo-quic-on%40h68.png -
        +

        AIMET Examples

        AIMET Examples provide reference code (in the form of Jupyter Notebooks) to learn how to apply AIMET quantization and compression features. It is also a quick way to become familiar with AIMET usage and APIs.

        For more details on each of the features and APIs please refer: Links to User Guide and API Documentation

        -
        +

        Browse the notebooks

        The following table has links to browsable versions of the notebooks for different features.

        Model Quantization Examples

        -
    • Potential Issue

      Description

      +
      -----+++++ @@ -1181,11 +1173,11 @@

      Browse the notebooks

      Model Compression Examples

      -

      Features

      +
      ---+++ @@ -1211,10 +1203,10 @@

      Browse the notebooks

      - -
      + +

      Running the notebooks

      -
      +

      Install Jupyter

      -
      -
      +
      -
      + +

      Run the notebooks

      @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,13 +1109,13 @@
        -
        +

        AIMET Greedy Compression Ratio Selection

        -
        +

        Overview

        The model compression methods, Spatial SVD and Channel Pruning work on per layer basis. Not all the layers in the given model are equally compressible. Compression of individual layers of a given model can have varying impact on the final accuracy of the model. Greedy Per Layer Compression Ratio Selection Algorithm is used to assess the sensitivity of applicable layers to compression and find appropriate compression-ratio for each individual layers. The algorithm makes sure that the entire model has highest remaining accuracy and also meets the given target compression-ratio.

        -
        -
        +
        +

        How it works

        The Greedy Compression Ratio Selection algorithm executes the following two steps:

          @@ -1141,22 +1133,22 @@

          How it works -

        -
        +
        +

        Per-layer Exploration

        For each layer, produces a column in the compression-ratio vs. model-performance table. This column captures the over all network performance values as the layer is compressed by predefined range of compression ratio candidates, while all other layers are left unmodified.

        ../_images/greedy_4.jpg

        In the above figure, you see an example model with 4 layers, and 10 compression-ratio candidates (which is the default setting). Note that the table does not capture the eval score for the last candidate which is always compression-ratio=1.0 (since this score is the baseline score and known already).

        Monotonic Fit: In some cases it is observed that the model performance is not a strict increasing function of increasing compression-ratios. To help with the greedy selection procedure, AIMET can apply a curve-fit scheme to try and fit the model-performance numbers for a given layer using a monotonically increasing function. The functionality is disabled by default.

        -
    • -
      + +

      Compression Ratio Selection

      This step is the core of the algorithm. It considers the compression-ratio vs. model-performance table for each applicable layer from the previous step, target compression ratio and function to calculate the cost of the compressed model depending on the compression method (Spatial SVD, Channel Pruning) used. It starts with a constant accuracy and finds the corresponding compression ratio for every applicable layer by interpolating from compression-ratio vs. model-performance evaluation table. The algorithm, then calculates total cost of the model to see if we have met our target compression ratio or not. Binary search algorithm is used to find the solution quickly. Finally it returns list of selected compression ratios for all applicable layers. This way, the algorithm achieves the highest remaining final accuracy of the compressed model and meet target compression ratio.

      The following figure illustrates that for a given accuracy, the compression ratio for each layer is different.

      ../_images/greedy_5.jpg

      As suggested by above diagram, the algorithm picks a lower compression ratio (higher compression) for layers which are more compressible and pick a higher compression ratio (lower compression) for layers which are less compressible (For lesser compressible layers the accuracy falls drstically if compression ratio is lowered).

      -
      - + + diff --git a/releases/1.33.0/user_guide/index.html b/releases/1.33.0/user_guide/index.html index 549d0de..d6fd6f1 100644 --- a/releases/1.33.0/user_guide/index.html +++ b/releases/1.33.0/user_guide/index.html @@ -1,8 +1,7 @@ - - + AI Model Efficiency Toolkit User Guide — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -62,7 +61,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -77,20 +75,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -98,13 +93,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -116,7 +109,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1116,25 +1108,25 @@
        -
        +

        AIMET Model Compression

        -
        +

        Overview

        AIMET provides a model compression library that can be used to reduce a model’s MAC and memory costs with a minimal drop in accuracy. AIMET supports various compression schemes like Weight SVD, Spatial SVD and Channel Pruning.

        Please see the Compression Guidebook - which includes some practical advice on using the compression features, and how to combine the features

        -
        -
        +
        +

        Use Case

        AIMET allows user to take a trained model and compress it to desired compression ratio which can be further fine-tuned and exported to a target. All of the compression schemes in AIMET use a two-step process - Compression ratio selection followed by model compression.

        ../_images/compression_use_case.PNG

        The following sub-sections explain these steps in more detail.

        -
        -
        +
        +

        Compression ratio selection

        @@ -1148,8 +1140,8 @@

        Compression ratio selectionVisualization

      - -
      + +

      Model Compression

      In this phase, AIMET will apply the compression ratios per layer to create a compressed model. Currently, AIMET supports the following model compression algorithms.

      @@ -1160,26 +1152,26 @@

      Model CompressionChannel Pruning

    • - -
      + +

      Optional techniques to get better compression results

      AIMET supports the following techniques that can be optionally used to get better compression results

      • Rank-rounding

      • Per-layer fine-tuning

      -
      +

      Rank Rounding

      Often ML runtime-software like those for Embedded ML accelerators, will prefer the dimensions of layers like Conv2d or FC to be of a certain multiplicity. Matching the expected dimension size will result in optimal runtime for that layer. AIMET techniques like Weight/Spatial SVD or Channel Pruning, try to decompose layers or reduce layers - specifically in terms of output channels and input channels. The rank-rounding feature in AIMET will try and reduce layers to match a user-provided multiplicity. By default this feature is disabled. At present, AIMET allows the user to specify a multiplicity-factor for the entire model, not on a per-layer basis.

      Users can make use of this feature to generate more optimal models for running on embedded targets.

      -
      -
      +
      +

      Per-layer Fine-tuning

      Given a user-model and desired compression-ratio, the user may sometimes notice a sharp degradation in accuracy after compression but before fine-tuning. One technique that might help the overall compression of such scenarios, is using a feature called per-layer fine-tuning. When this feature is selected, AIMET invokes a user-provided fine-tuning function after compressing every layer that was selected for compression. This is done during the Model Compression phase in the diagram shown above.

      Note: The user is responsible for choosing appropriate learning-rates and other training parameters for fine-tuning. Using this feature may require the user to carefully pick the learning rates and learning-rate-decay parameters to be used during fine-tuning.

      -
      - -
      + + +
      -
      + +

      References

      1. Xiangyu Zhang, Jianhua Zou, Kaiming He, and Jian Sun. “Accelerating Very Deep Convolutional Networks for Classification and Detection.” IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 38, no. 10, pp. 1943-1955, 1 Oct. 2016.

      2. @@ -1207,8 +1199,8 @@

        References - - + Model Guidelines for PyTorch — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -61,7 +60,6 @@
      3. Determining Quantization Parameters (Encodings)
      4. Quantization Schemes
      5. Configuring Quantization Simulation Ops
      6. -
      7. Quantization Simulation APIs
      8. Frequently Asked Questions
      9. @@ -76,20 +74,17 @@
      10. AutoQuant
      11. Adaptive Rounding (AdaRound)
      12. Cross-Layer Equalization @@ -97,13 +92,11 @@
      13. BN Re-estimation
      14. Bias Correction [Depricated] @@ -115,7 +108,6 @@
      15. Overview
      16. Requirements
      17. Detailed Analysis Descriptions
      18. -
      19. QuantAnalyzer API
      20. Visualizations
          @@ -1111,12 +1103,17 @@
          -
          +

          Model Guidelines for PyTorch

          To implement the Cross Layer Equalization API, aimet_torch.cross_layer_equalization.equalize_model(), AIMET creates a computing graph to analyze the sequence of Operations in the model. If your model is defined using certain constructs, it restricts AIMET from successfully creating and analyzing the computing graph. The following table lists the potential issues and workarounds.

          Note: These restrictions are not applicable, if you are using the Primitive APIs

      Features

      +++++ @@ -1174,7 +1171,7 @@

      Model Guidelines for PyTorch - - + AIMET Model Quantization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1116,7 +1108,7 @@
        -
        +

        AIMET Model Quantization

        Models are generally trained on floating-point hardware like CPUs and GPUs. However, when these trained models are run on quantized hardware that support fixed-precision operations, model parameters are converted from floating-point @@ -1127,7 +1119,7 @@

        AIMET provides multiple techniques and tools which help to create quantized models with a minimal loss in accuracy relative to floating-point models.

        This section provides information on typical use cases and AIMET’s quantization features.

        -
        +

        Use Cases

        1. Predict on-target accuracy: AIMET enables a user to simulate the effects of quantization to get a first order estimate of the model’s accuracy when run on quantized targets. This is useful to get an estimate of on-target accuracy @@ -1157,8 +1149,8 @@

        Use Cases

        -
        -
        +
        +

        AIMET Quantization Features

        @@ -1193,7 +1185,7 @@

        AIMET Quantization Features +

        Post-Training Quantization

        • Post-Training Quantization (PTQ) Techniques:

          @@ -1232,8 +1224,8 @@

          Post-Training Quantization +

        +

        Debugging/Analysis Tools

        @@ -1256,9 +1248,9 @@

        Debugging/Analysis Tools

      - - -
      + + +

      AIMET Quantization Workflow

      This section describes the recommended workflow for quantizing a neural network.

      @@ -1267,7 +1259,7 @@

      AIMET Quantization Workflow +

      PyTorch

      @@ -1291,8 +1283,8 @@

      PyTorch

    • - -
      + +

      Tensorflow

      @@ -1329,17 +1321,17 @@

      Tensorflow +

      + +

      Debugging Guidelines

      Applying AIMET Quantization features may involve some trial and error in order to find the best optimizations to apply on a particular model. We have included some debugging steps in the Quantization Guidebook that can be tried when quantization accuracy does not seem to improve right off the bat.

      -
      - + + diff --git a/releases/1.33.0/user_guide/post_training_quant_techniques.html b/releases/1.33.0/user_guide/post_training_quant_techniques.html index 132c93f..7484444 100644 --- a/releases/1.33.0/user_guide/post_training_quant_techniques.html +++ b/releases/1.33.0/user_guide/post_training_quant_techniques.html @@ -1,8 +1,7 @@ - - + AIMET Post-Training Quantization Techniques — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,15 +1109,15 @@
        -
        +

        AIMET QuantAnalyzer

        -
        +

        Overview

        The QuantAnalyzer feature analyzes the model for quantization and points out sensitive parts/hotspots in the model. The analyses are performed automatically, and only requires the user to pass in callbacks for performing forward pass and evaluation, and optionally a dataloader for MSE loss analysis.

        For each analysis, QuantAnalyzer outputs json and/or html files containing data and plots for easy visualization.

        -
        -
        +
        +

        Requirements

        To call the QuantAnalyzer API, users need to provide the following:
          @@ -1141,8 +1133,8 @@

          RequirementsPyTorch QuantAnalyzer API Docs for more information on how to call the QuantAnalyzer feature.

          Note: Typically on quantized runtimes, batch normalization layers will be folded where possible. So that users do not have to call a separate API to do so, QuantAnalyzer automatically performs Batch Norm Folding prior to running its analyses.

          -

        -
        +
        +

        Detailed Analysis Descriptions

        QuantAnalyzer performs the following analyses:

          @@ -1196,18 +1188,8 @@

          Detailed Analysis Descriptions -

          QuantAnalyzer API

          -

          Please refer to the links below to view the QuantAnalyzer API for each AIMET variant:

          - - - +

        +
        diff --git a/releases/1.33.0/user_guide/quantization_aware_training.html b/releases/1.33.0/user_guide/quantization_aware_training.html index 0109621..7ed4bef 100644 --- a/releases/1.33.0/user_guide/quantization_aware_training.html +++ b/releases/1.33.0/user_guide/quantization_aware_training.html @@ -1,8 +1,7 @@ - - + AIMET Quantization Aware Training — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,7 +1109,7 @@
        -
        +

        AIMET Quantization Features Guidebook

        AIMET supports various neural network quantization techniques. A more in-depth discussion on various techniques and their usage is provided in User Guide

        @@ -1142,8 +1134,7 @@ 1. Apply CLE if not already implemented, especially for models with depth-wise separable convolutions. 2. Try per-channel quantization. This will address the issue of uneven per-channel weight distribution. 3. Apply bias correction or AdaRound if calibration data is available

        -../_images/quantization_debugging_flow_chart.png - +../_images/quantization_debugging_flow_chart.png

        Fixing activation quantization To reduce the quantization error from activation quantization, we can also try using different range setting methods or adjust CLE to take activation quantization ranges into account, as vanilla CLE can lead to uneven activation @@ -1164,7 +1155,7 @@

        After completing the above steps, the last step is to quantize the complete model to the desired bit-width. If the accuracy is acceptable, we have our final quantized model ready to use. Otherwise, we can consider higher bit-widths and smaller granularities or revert to more powerful quantization methods, such as quantization-aware training.

        -
        +
        diff --git a/releases/1.33.0/user_guide/quantization_sim.html b/releases/1.33.0/user_guide/quantization_sim.html index 1970e9d..e611434 100644 --- a/releases/1.33.0/user_guide/quantization_sim.html +++ b/releases/1.33.0/user_guide/quantization_sim.html @@ -1,8 +1,7 @@ - - + AIMET Quantization Simulation — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -65,7 +64,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -80,20 +78,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -101,13 +96,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -119,7 +112,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • Visualizations -
      +

      New Features

      We have now enabled blockwise quantization and low power blockwise quantization for QuantSim v2 users. When applied, these features obtain encoding parameters with a finer granularity, which produces a more optimized quantization grid.

      To learn more, please refer to the following documentation:

      @@ -1152,9 +1144,9 @@

      New FeaturesBlockwise Quantization

    • Low Power Blockwise Quantization

    • - - - + + + diff --git a/releases/1.33.0/user_guide/release_notes.html b/releases/1.33.0/user_guide/release_notes.html index 5f0b2e7..1bdf240 100644 --- a/releases/1.33.0/user_guide/release_notes.html +++ b/releases/1.33.0/user_guide/release_notes.html @@ -1,8 +1,7 @@ - - + AIMET Release Notes — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -61,7 +60,6 @@
    • Determining Quantization Parameters (Encodings)
    • Quantization Schemes
    • Configuring Quantization Simulation Ops
    • -
    • Quantization Simulation APIs
    • Frequently Asked Questions
    • @@ -76,20 +74,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -97,13 +92,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -115,7 +108,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,14 +1109,13 @@
        -
        +

        AIMET Spatial SVD

        Spatial SVD is a tensor decomposition technique which decomposes one large layer (in terms of mac or memory) into two smaller layers. SVD stands for Singular Value Decomposition.

        Given a conv layer, with kernel (𝑚,𝑛,ℎ,𝑤) where 𝑚 is the input channels, 𝑛 the output channels, and ℎ, 𝑤 giving the height and width of the kernel itself, Spatial SVD will decompose the kernel into two kernels. One of size (𝑚,𝑘,ℎ,1) and one of size (𝑘,𝑛,1,𝑤), where k is called the rank. The smaller the value of k the larger the degree of compression achieved.

        The following diagram illustrates this visually. As you can see, Spatial SVD decomposes both the output channel dimension as well as the size of the conv kernel itself. Spatial SVD is currently supported for Conv layers in AIMET.

        -../_images/spatial_svd.png - -
        +../_images/spatial_svd.png +
        diff --git a/releases/1.33.0/user_guide/visualization_compression.html b/releases/1.33.0/user_guide/visualization_compression.html index fe30613..dc8417c 100644 --- a/releases/1.33.0/user_guide/visualization_compression.html +++ b/releases/1.33.0/user_guide/visualization_compression.html @@ -1,8 +1,7 @@ - - + AIMET Visualization — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,32 +1109,32 @@
        -
        +

        AIMET Visualization

        -
        +

        Overview

        AIMET Visualization adds analytical capability to the AIMET tool (which helps quantize and compress ML models) through visualization. It provide more detailed insights in to AIMET features as users are able to analyze a model’s layers in terms of compressibility and also highlight potential issues when applying quantization. The tool also assists in displaying progress for computationally heavy tasks.

        -
        -
        +
        +

        Design

        Given a model, a user can start a Bokeh server session and then invoke functions which will produce visualizations to help analyze and understand the model before using AIMET features from quantization and compression

        ../_images/vis_1.png -
        -
        +
        +

        Compression

        Evaluation scores during compression are displayed in a table as they are computed and users can see the progress displayed while computing these scores. After Greedy Selection has run, the optimal compression ratios are also displayed in a graph

        ../_images/vis_4.png ../_images/vis_5.png ../_images/vis_6.png ../_images/vis_7.png - -
        +
        +

        Starting a Bokeh Server Session:

        Start a bokeh server by typing this command: bokeh serve –allow-websocket-origin=<host name>:<port number> –port=<port number>

        –allow-websocket-origin tells the Bokeh server which network addresses to listen on, again not typically needed for local It is not need just to view locally.

        –port tells the Bokeh server what network port to listen on rather than the default port of 5006

        - -
        +
        +
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,18 +1109,18 @@
        -
        +

        AIMET Visualization for Quantization

        -
        +

        Overview

        AIMET Visualization adds analytical capability to the AIMET tool (which helps quantize and compress ML models) through visualization. It provides more detailed insights into AIMET features as users are able to analyze a model’s layers in terms of compressibility and also highlight potential issues when applying quantization. The tool also assists in displaying progress for computationally heavy tasks. The visualizations get saved as an HTML file under the specified directory.

        -
        -
        +
        +

        Quantization

        During quantization, common parameters are used throughout a layer for converting the floating point weight values to INT8. If the dynamic range in weights is very high, the quantization will not be very granular. To equalize the weight range we apply Cross Layer Equalization. In order to understand if we need to apply Cross Layer Equalization, we can visualize the weight range for every channel in a layer. If the weight range varies a lot over the various channels, applying cross layer equalization helps in improving the Quantization accuracy.

        ../_images/vis_3.png -
        +

        PyTorch

        In PyTorch, we can visualize the weights for a model. We can also visualize the weight ranges for a model before and after Cross Layer Equalization. There are three main functions a user can invoke:

        @@ -1137,8 +1129,8 @@

        PyTorch +

        +

        TensorFlow

        In TensorFlow, we can visualize the weight ranges and relative weight ranges over various channels in a layer. User can also use the same functions to see the changes in a layer weight ranges before and after Cross Layer Equalization.

        @@ -1147,9 +1139,9 @@

        TensorFlow - - + AIMET Weight SVD — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1117,14 +1109,13 @@
        -
        +

        AIMET Weight SVD

        Weight SVD is a tensor decomposition technique which decomposes one large layer (in terms of mac or memory) into two smaller layers. SVD stands for Singular Value Decomposition.

        Given a neural network layer, with kernel (𝑚,𝑛,ℎ,𝑤) where 𝑚 is the input channels, 𝑛 the output channels, and ℎ, 𝑤 giving the height and width of the kernel itself, Weight SVD will decompose the kernel into one of size (𝑚,𝑘,1,1) and another of size (𝑘,𝑛,h,𝑤), where 𝑘 is called the rank. The smaller the value of 𝑘 the larger the degree of compression achieved.

        The following diagram illustrates this visually. As you can see, Weight SVD decomposes the output channel dimension. Weight SVD is currently supported for Conv and Full-connected layers in AIMET.

        -../_images/weight_svd.png - -
        +../_images/weight_svd.png +
        diff --git a/releases/1.33.0/user_guide/winnowing.html b/releases/1.33.0/user_guide/winnowing.html index 17827be..5731af9 100644 --- a/releases/1.33.0/user_guide/winnowing.html +++ b/releases/1.33.0/user_guide/winnowing.html @@ -1,8 +1,7 @@ - - + AIMET Winnowing — AI Model Efficiency Toolkit Documentation: ver 1.33.0 @@ -63,7 +62,6 @@
      • Determining Quantization Parameters (Encodings)
      • Quantization Schemes
      • Configuring Quantization Simulation Ops
      • -
      • Quantization Simulation APIs
      • Frequently Asked Questions
    • @@ -78,20 +76,17 @@
    • AutoQuant
    • Adaptive Rounding (AdaRound)
    • Cross-Layer Equalization @@ -99,13 +94,11 @@
    • BN Re-estimation
    • Bias Correction [Depricated] @@ -117,7 +110,6 @@
    • Overview
    • Requirements
    • Detailed Analysis Descriptions
    • -
    • QuantAnalyzer API
    • Visualizations
        @@ -1118,18 +1110,18 @@
        -
        +

        AIMET Winnowing

        -
        +

        Overview

        The model compression algorithm, Channel Pruning, identifies modules in a model, whose subset of input channels could be pruned without losing much accuracy. Unless explicitly removed, these input channels take up memory and add to unnecessary computation. For each identified module, the Winnow tool removes the input channels that were selected for pruning. Only Conv2D layers are supported for winnowing.

        -
        -
        +
        +

        Winnowing Overview

        The following figure provides a pictorial overview of Winnowing. In this example, a module in a model has an input volume of HxWx8, where H = Height, W = Width and Number of input Channels = 8. The Channel Pruning algorithm identifies that for this module, input channels 1, 4 and 7 should be pruned. Winnowing removes the identified input channels from this modules. The module’s input volume is now reduced to HxWx5.

        ../_images/winnow_1.png -
        -
        +
        +

        How Winnowing Works

        When the number of input channels of a Conv module is reduced, the output channels of the module above it must also be modified. If the module above is a another Conv layer, that Conv layer’s output channels are also reduced to match the number of input channels of the winnowed Conv module. If the module above is NOT a Conv layer (e.g., BatchNorm, ReLU), that module simply propagates the changes upstream. That is both the output and the input channels of teh BatchNorm and ReLU modules are winnowed to match the winnowed channels of the Conv layer just below them.

        The following figure explains a very simple scenario. In this scenario, a Conv module has been identified for winnowing a sub set of its input channels. This is indicated by green color on the left side of the figure. The right side of the figure indicates the actions taken by Winnowing. Winnowing consists of the following changes done to the 3 affected modules.

        @@ -1137,8 +1129,8 @@

        How Winnowing Works - - +

        +
    • Potential Issue

      Description