Quantization Backend Configuration¶
FX Graph Mode Quantization allows the user to configure various quantization behaviors of an op in order to match the expectation of their backend.
In the future, this document will contain a detailed spec of these configurations.
Default values for native configurations¶
Below is the output of the configuration for quantization of ops in fbgemm and qnnpack (PyTorch’s default quantized backends).
Results:
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method adaptive_avg_pool1d of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function adaptive_avg_pool2d at 0x7fc98a30ba70>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function adaptive_avg_pool3d at 0x7fc98a30b950>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<built-in method relu of type object at 0x7fc9db52bd40>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': <built-in function add>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method add of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in method add of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<built-in method relu of type object at 0x7fc9db52bd40>, <built-in method add of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': <built-in method add of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method avg_pool1d of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function avg_pool2d>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function avg_pool3d>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.batchnorm.BatchNorm2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c29e0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.BNReLU2d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.batchnorm.BatchNorm2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2a70>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.BNReLU2d'>,
},
{
'pattern': <class 'torch.nn.modules.batchnorm.BatchNorm2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.batchnorm.BatchNorm3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2b00>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.BNReLU3d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.batchnorm.BatchNorm3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2b90>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.BNReLU3d'>,
},
{
'pattern': <class 'torch.nn.modules.batchnorm.BatchNorm3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.BNReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.BNReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in method cat of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method clamp of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': clamp,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': contiguous,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
'qat_module': <class 'torch.nn.qat.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.nn.qat.modules.conv.Conv1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
},
{
'pattern': <built-in method conv1d of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894b9f80>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU1d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2050>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU1d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv1d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in method conv1d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c20e0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBn1d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c2170>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c2200>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
'qat_module': <class 'torch.nn.qat.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.nn.qat.modules.conv.Conv2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
},
{
'pattern': <built-in method conv2d of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2320>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU2d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c23b0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU2d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv2d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in method conv2d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c2440>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBn2d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c24d0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c2560>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
'qat_module': <class 'torch.nn.qat.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.qat.modules.conv.Conv3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
},
{
'pattern': <built-in method conv3d of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2680>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU3d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9894c2710>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvReLU3d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv3d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in method conv3d of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c27a0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBn3d'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c2830>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>)),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7fc9894c28c0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBn1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBn2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBn3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose1d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.ConvTranspose1d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c2290>,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose1d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose2d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.ConvTranspose2d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c25f0>,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose2d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'root_module': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose3d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.ConvTranspose3d'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894c2950>,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.conv.ConvTranspose3d'>,
},
{
'pattern': detach,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': detach_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.dropout.Dropout'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function dropout at 0x7fc98a30b9e0>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ELU'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function elu at 0x7fc98a30e200>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.sparse.Embedding'>,
'dtype_configs': [
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint8,
'output_dtype': torch.float32,
},
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint4x2,
'output_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.qat.modules.embedding_ops.Embedding'>,
'root_module': <class 'torch.nn.modules.sparse.Embedding'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.sparse.Embedding'>,
'_input_output_observed': False,
},
{
'pattern': <class 'torch.nn.qat.modules.embedding_ops.Embedding'>,
'dtype_configs': [
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint8,
'output_dtype': torch.float32,
},
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint4x2,
'output_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.Embedding'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.sparse.Embedding'>,
'_input_output_observed': False,
},
{
'pattern': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'dtype_configs': [
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint8,
'output_dtype': torch.float32,
},
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint4x2,
'output_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.qat.modules.embedding_ops.EmbeddingBag'>,
'root_module': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.sparse.EmbeddingBag'>,
'_input_output_observed': False,
},
{
'pattern': <class 'torch.nn.qat.modules.embedding_ops.EmbeddingBag'>,
'dtype_configs': [
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint8,
'output_dtype': torch.float32,
},
{
'input_dtype': torch.float32,
'weight_dtype': torch.quint4x2,
'output_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.sparse.EmbeddingBag'>,
'_input_output_observed': False,
},
{
'pattern': <built-in method flatten of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function floordiv>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.rnn.GRUCell'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.GRUCell'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.rnn.GRUCell'>,
},
{
'pattern': <class 'torch.nn.modules.activation.Hardsigmoid'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <function hardsigmoid at 0x7fc98a30ecb0>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': hardsigmoid,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': hardsigmoid_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <class 'torch.nn.modules.activation.Hardswish'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function hardswish at 0x7fc98a30ee60>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Hardtanh'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function hardtanh at 0x7fc98a30e560>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function hardtanh_>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.linear.Identity'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function instance_norm at 0x7fc98a314710>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function interpolate at 0x7fc98a297dd0>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.normalization.LayerNorm'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function layer_norm at 0x7fc98a314680>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.LeakyReLU'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function leaky_relu at 0x7fc98a30e3b0>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.linear.Linear'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
'qat_module': <class 'torch.nn.qat.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.qat.modules.linear.Linear'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
},
{
'pattern': <built-in function linear>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc9e39d9b00>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.LinearReLU'>,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7fc98952f4d0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.LinearReLU'>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function linear>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in function linear>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'fuser_method': <function reverse2.<locals>.reversed at 0x7fc9894b9ef0>,
'fused_module': <class 'torch.nn.intrinsic.modules.fused.LinearBn1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.LinearBn1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.linear_fused.LinearBn1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.linear_fused.LinearBn1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.intrinsic.modules.fused.LinearReLU'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.linear_relu.LinearReLU'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.linear_relu.LinearReLU'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.modules.rnn.LSTM'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.LSTM'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.rnn.LSTM'>,
},
{
'pattern': <class 'torch.nn.modules.rnn.LSTMCell'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.LSTMCell'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.rnn.LSTMCell'>,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool1d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool1d,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool2d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool2d,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool3d'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool3d,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method mean of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': mean,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<built-in method relu of type object at 0x7fc9db52bd40>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': <built-in function mul>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method mul of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<function relu at 0x7fc98a30bef0>, <built-in method mul of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': (<built-in method relu of type object at 0x7fc9db52bd40>, <built-in method mul of type object at 0x7fc9db52bd40>),
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': <built-in method mul of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
},
{
'pattern': permute,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ReLU'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function relu at 0x7fc98a30bef0>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': relu,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': relu_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ReLU6'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function relu6 at 0x7fc98a30e170>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': repeat,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method repeat_interleave of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': repeat_interleave,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': reshape,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': resize_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.rnn.RNNCell'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float32,
'output_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.RNNCell'>,
'reference_quantized_module_for_root': <class 'torch.nn.quantized._reference.modules.rnn.RNNCell'>,
},
{
'pattern': shape,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Sigmoid'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method sigmoid of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': sigmoid,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': sigmoid_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': size,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Softmax'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method squeeze of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': squeeze,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': squeeze_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method stack of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Tanh'>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method tanh of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': tanh,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': tanh_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'weight_dtype': torch.qint8,
'bias_dtype': torch.float32,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'_overwrite_output_fake_quantizer': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'_overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method transpose of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': transpose,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method unsqueeze of type object at 0x7fc9db52bd40>,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': unsqueeze,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': unsqueeze_,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': view,
'dtype_configs': [
{
'input_dtype': torch.quint8,
'output_dtype': torch.quint8,
},
{
'input_dtype': torch.float16,
'weight_dtype': torch.float16,
'bias_dtype': torch.float16,
'output_dtype': torch.float16,
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
}