Traktor/myenv/Lib/site-packages/torch/onnx/symbolic_caffe2.py

import importlib
import inspect

from torch.onnx import symbolic_helper, symbolic_opset9 as opset9
from torch.onnx._internal import jit_utils, registration


def register_quantized_ops(domain: str, version: int):
    # Register all quantized ops
    module = importlib.import_module("torch.onnx.symbolic_caffe2")
    quant_version_ops = inspect.getmembers(module)
    aten_q_ops = {
        "relu",
        "_empty_affine_quantized",
        "dequantize",
        "quantize_per_tensor",
        "upsample_nearest2d",
        "avg_pool2d",
        "reshape",
        "slice",
        "cat",
        "max_pool2d",
        "sigmoid",
    }
    for op, func in quant_version_ops:
        name = f"{domain}::{op}"
        if inspect.isfunction(func) and not registration.registry.is_registered_op(
            name, version
        ):
            if op in aten_q_ops:
                # Override the builtin aten ops
                registration.registry.register(
                    f"aten::{op}", version, func, custom=True
                )
            registration.registry.register(name, version, func)


def _permute_helper(g: jit_utils.GraphContext, input, axes):
    quant_args = {
        "axes_i": axes,
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    output = g.op("_caffe2::Int8Transpose", input, **quant_args)
    symbolic_helper._quantized_ops.add(output)
    return output


def nchw2nhwc(g: jit_utils.GraphContext, input):
    axes = [0, 2, 3, 1]
    return _permute_helper(g, input, axes)


def nhwc2nchw(g: jit_utils.GraphContext, input):
    axes = [0, 3, 1, 2]
    return _permute_helper(g, input, axes)


def linear_prepack(g: jit_utils.GraphContext, weight, bias):
    # Mapping to a dummy caffe2 prepack node.
    # During the onnx -> c2 conversion we can look up original weight and bias
    # from this node
    output = g.op("_caffe2::WeightPrepack", weight, bias)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "v", "v", "f", "i")
def linear(g: jit_utils.GraphContext, input, weight, bias, scale, zero_point):
    kwargs = {
        "Y_scale_f": scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8FC", input, weight, bias, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


def conv_prepack(
    g: jit_utils.GraphContext, input, weight, bias, stride, padding, dilation, groups
):
    # Mapping to a dummy caffe2 prepack node.
    # During the onnx -> c2 conversion we can look up original weight and bias
    # from this node
    output = g.op("_caffe2::WeightPrepack", input, weight, bias)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "v", "v", "is", "is", "is", "i", "f", "i")
def conv2d(
    g: jit_utils.GraphContext,
    input,
    weight,
    bias,
    stride,
    padding,
    dilation,
    groups,
    scale,
    zero_point,
):
    kernel_size = weight.node()["shape"][1:3]
    kwargs = {
        "strides_i": stride,
        "pads_i": padding + padding,
        "dilations_i": dilation,
        "group_i": groups,
        "kernels_i": kernel_size,
        "order_s": "NHWC",
        "Y_scale_f": scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8Conv", input, weight, bias, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "v", "v", "is", "is", "is", "i", "f", "i")
def conv2d_relu(
    g: jit_utils.GraphContext,
    input,
    weight,
    bias,
    stride,
    padding,
    dilation,
    groups,
    scale,
    zero_point,
):
    kernel_size = weight.node()["shape"][1:3]
    kwargs = {
        "strides_i": stride,
        "pads_i": padding + padding,
        "dilations_i": dilation,
        "group_i": groups,
        "kernels_i": kernel_size,
        "order_s": "NHWC",
        "Y_scale_f": scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8ConvRelu", input, weight, bias, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "v", "f", "i")
def add(g: jit_utils.GraphContext, input_a, input_b, scale, zero_point):
    kwargs = {
        "Y_scale_f": scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8Add", input_a, input_b, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v")
def relu(g: jit_utils.GraphContext, input):
    if input not in symbolic_helper._quantized_ops:
        return opset9.relu(g, input)
    kwargs = {
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    output = g.op("_caffe2::Int8Relu", input, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "f", "i", "t")
def quantize_per_tensor(g: jit_utils.GraphContext, input, scale, zero_point, dtype):
    kwargs = {
        "Y_scale_f": scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8Quantize", input, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v")
def dequantize(g: jit_utils.GraphContext, input):
    return g.op("_caffe2::Int8Dequantize", input)


@symbolic_helper.parse_args("v", "t", "t", "t", "t", "t", "t", "t")
def _empty_affine_quantized(
    g: jit_utils.GraphContext,
    input,
    shape,
    scale,
    zero_point,
    dtype,
    pin_memory,
    memory_format,
    layout,
):
    return input


def upsample_nearest2d(
    g: jit_utils.GraphContext,
    input,
    output_size,
    align_corners=None,
    scales_h=None,
    scales_w=None,
):
    if input not in symbolic_helper._quantized_ops:
        return opset9.upsample_nearest2d(g, input, output_size, align_corners)  # type: ignore[attr-defined]

    output_size = symbolic_helper._parse_arg(output_size, "is")
    kwargs = {
        "output_size_i": output_size,
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    input = nchw2nhwc(g, input)
    output = g.op("_caffe2::Int8ResizeNearest", input, **kwargs)
    output = nhwc2nchw(g, output)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "is", "is", "is", "is", "i")
def max_pool2d(
    g: jit_utils.GraphContext,
    input,
    kernel_size,
    stride,
    padding,
    dilation,
    ceil_mode,
):
    if input not in symbolic_helper._quantized_ops:
        return opset9.max_pool2d(  # type: ignore[attr-defined]
            g, input, kernel_size, stride, padding, dilation, ceil_mode
        )
    kwargs = {
        "strides_i": stride,
        "pads_i": padding + padding,
        "kernel_i": kernel_size[0],
        "order_s": "NHWC",
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    input = nchw2nhwc(g, input)
    output = g.op("_caffe2::Int8MaxPool", input, **kwargs)
    output = nhwc2nchw(g, output)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "is", "is", "is", "i", "i", "none")
def avg_pool2d(
    g: jit_utils.GraphContext,
    input,
    kernel_size,
    stride,
    padding,
    ceil_mode,
    count_include_pad,
    divisor_override=None,
):
    if input not in symbolic_helper._quantized_ops:
        return opset9.avg_pool2d(  # type: ignore[attr-defined]
            g,
            input,
            kernel_size,
            stride,
            padding,
            ceil_mode,
            count_include_pad,
            divisor_override,
        )
    kwargs = {
        "strides_i": stride,
        "pads_i": padding + padding,
        "kernel_i": kernel_size[0],
        "order_s": "NHWC",
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    input = nchw2nhwc(g, input)
    output = g.op("_caffe2::Int8AveragePool", input, **kwargs)
    output = nhwc2nchw(g, output)
    symbolic_helper._quantized_ops.add(output)
    return output


def reshape(g: jit_utils.GraphContext, input, shape):
    if input not in symbolic_helper._quantized_ops:
        return opset9.reshape(g, input, shape)

    kwargs = {
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    output = g.op("_caffe2::Int8Reshape", input, shape, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v", "v", "v", "v", "i")
def slice(g: jit_utils.GraphContext, input, dim, start, end, step):
    if input not in symbolic_helper._quantized_ops:
        return opset9.slice(g, input, dim, start, end, step)

    if step != 1:
        raise RuntimeError("ONNX quantized slice export only works for step 1.")
    start = symbolic_helper._parse_arg(start, "i")
    end = symbolic_helper._parse_arg(end, "i")
    dim = symbolic_helper._parse_arg(dim, "i")

    kwargs = {
        "start_idx_i": start,
        "end_idx_i": end,
        "dim_i": dim,
        "Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),
        "Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),
    }
    output = g.op("_caffe2::Int8Slice", input, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


def cat(g: jit_utils.GraphContext, tensor_list, dim, scale=None, zero_point=None):
    tensors = symbolic_helper._unpack_list(tensor_list)
    input = tensors[0]
    if input not in symbolic_helper._quantized_ops:
        return opset9.cat(g, tensor_list, dim)

    dim = symbolic_helper._parse_arg(dim, "i")
    kwargs = {
        "Y_scale_f": tensors[0].node()["Y_scale"],
        "Y_zero_point_i": tensors[0].node()["Y_zero_point"],
    }
    output = g.op("_caffe2::Int8Concat", *tensors, axis_i=dim, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output


@symbolic_helper.parse_args("v")
def sigmoid(g: jit_utils.GraphContext, input):
    if input not in symbolic_helper._quantized_ops:
        return opset9.sigmoid(g, input)
    # Caffe2 expects the output scale to be 1/2^8
    # and output zero_point to be 0 (quint8 type)
    out_scale = 1.0 / 256
    zero_point = 0
    kwargs = {
        "Y_scale_f": out_scale,
        "Y_zero_point_i": zero_point,
    }
    output = g.op("_caffe2::Int8Sigmoid", input, **kwargs)
    symbolic_helper._quantized_ops.add(output)
    return output
losowanie zdjec 2024-05-26 05:12:46 +02:00			`import importlib`
			`import inspect`

			`from torch.onnx import symbolic_helper, symbolic_opset9 as opset9`
			`from torch.onnx._internal import jit_utils, registration`


			`def register_quantized_ops(domain: str, version: int):`
			`# Register all quantized ops`
			`module = importlib.import_module("torch.onnx.symbolic_caffe2")`
			`quant_version_ops = inspect.getmembers(module)`
			`aten_q_ops = {`
			`"relu",`
			`"_empty_affine_quantized",`
			`"dequantize",`
			`"quantize_per_tensor",`
			`"upsample_nearest2d",`
			`"avg_pool2d",`
			`"reshape",`
			`"slice",`
			`"cat",`
			`"max_pool2d",`
			`"sigmoid",`
			`}`
			`for op, func in quant_version_ops:`
			`name = f"{domain}::{op}"`
			`if inspect.isfunction(func) and not registration.registry.is_registered_op(`
			`name, version`
			`):`
			`if op in aten_q_ops:`
			`# Override the builtin aten ops`
			`registration.registry.register(`
			`f"aten::{op}", version, func, custom=True`
			`)`
			`registration.registry.register(name, version, func)`


			`def _permute_helper(g: jit_utils.GraphContext, input, axes):`
			`quant_args = {`
			`"axes_i": axes,`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`output = g.op("_caffe2::Int8Transpose", input, **quant_args)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`def nchw2nhwc(g: jit_utils.GraphContext, input):`
			`axes = [0, 2, 3, 1]`
			`return _permute_helper(g, input, axes)`


			`def nhwc2nchw(g: jit_utils.GraphContext, input):`
			`axes = [0, 3, 1, 2]`
			`return _permute_helper(g, input, axes)`


			`def linear_prepack(g: jit_utils.GraphContext, weight, bias):`
			`# Mapping to a dummy caffe2 prepack node.`
			`# During the onnx -> c2 conversion we can look up original weight and bias`
			`# from this node`
			`output = g.op("_caffe2::WeightPrepack", weight, bias)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "v", "v", "f", "i")`
			`def linear(g: jit_utils.GraphContext, input, weight, bias, scale, zero_point):`
			`kwargs = {`
			`"Y_scale_f": scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8FC", input, weight, bias, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`def conv_prepack(`
			`g: jit_utils.GraphContext, input, weight, bias, stride, padding, dilation, groups`
			`):`
			`# Mapping to a dummy caffe2 prepack node.`
			`# During the onnx -> c2 conversion we can look up original weight and bias`
			`# from this node`
			`output = g.op("_caffe2::WeightPrepack", input, weight, bias)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "v", "v", "is", "is", "is", "i", "f", "i")`
			`def conv2d(`
			`g: jit_utils.GraphContext,`
			`input,`
			`weight,`
			`bias,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`scale,`
			`zero_point,`
			`):`
			`kernel_size = weight.node()["shape"][1:3]`
			`kwargs = {`
			`"strides_i": stride,`
			`"pads_i": padding + padding,`
			`"dilations_i": dilation,`
			`"group_i": groups,`
			`"kernels_i": kernel_size,`
			`"order_s": "NHWC",`
			`"Y_scale_f": scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8Conv", input, weight, bias, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "v", "v", "is", "is", "is", "i", "f", "i")`
			`def conv2d_relu(`
			`g: jit_utils.GraphContext,`
			`input,`
			`weight,`
			`bias,`
			`stride,`
			`padding,`
			`dilation,`
			`groups,`
			`scale,`
			`zero_point,`
			`):`
			`kernel_size = weight.node()["shape"][1:3]`
			`kwargs = {`
			`"strides_i": stride,`
			`"pads_i": padding + padding,`
			`"dilations_i": dilation,`
			`"group_i": groups,`
			`"kernels_i": kernel_size,`
			`"order_s": "NHWC",`
			`"Y_scale_f": scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8ConvRelu", input, weight, bias, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "v", "f", "i")`
			`def add(g: jit_utils.GraphContext, input_a, input_b, scale, zero_point):`
			`kwargs = {`
			`"Y_scale_f": scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8Add", input_a, input_b, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v")`
			`def relu(g: jit_utils.GraphContext, input):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.relu(g, input)`
			`kwargs = {`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`output = g.op("_caffe2::Int8Relu", input, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "f", "i", "t")`
			`def quantize_per_tensor(g: jit_utils.GraphContext, input, scale, zero_point, dtype):`
			`kwargs = {`
			`"Y_scale_f": scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8Quantize", input, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v")`
			`def dequantize(g: jit_utils.GraphContext, input):`
			`return g.op("_caffe2::Int8Dequantize", input)`


			`@symbolic_helper.parse_args("v", "t", "t", "t", "t", "t", "t", "t")`
			`def _empty_affine_quantized(`
			`g: jit_utils.GraphContext,`
			`input,`
			`shape,`
			`scale,`
			`zero_point,`
			`dtype,`
			`pin_memory,`
			`memory_format,`
			`layout,`
			`):`
			`return input`


			`def upsample_nearest2d(`
			`g: jit_utils.GraphContext,`
			`input,`
			`output_size,`
			`align_corners=None,`
			`scales_h=None,`
			`scales_w=None,`
			`):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.upsample_nearest2d(g, input, output_size, align_corners) # type: ignore[attr-defined]`

			`output_size = symbolic_helper._parse_arg(output_size, "is")`
			`kwargs = {`
			`"output_size_i": output_size,`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`input = nchw2nhwc(g, input)`
			`output = g.op("_caffe2::Int8ResizeNearest", input, **kwargs)`
			`output = nhwc2nchw(g, output)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "is", "is", "is", "is", "i")`
			`def max_pool2d(`
			`g: jit_utils.GraphContext,`
			`input,`
			`kernel_size,`
			`stride,`
			`padding,`
			`dilation,`
			`ceil_mode,`
			`):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.max_pool2d( # type: ignore[attr-defined]`
			`g, input, kernel_size, stride, padding, dilation, ceil_mode`
			`)`
			`kwargs = {`
			`"strides_i": stride,`
			`"pads_i": padding + padding,`
			`"kernel_i": kernel_size[0],`
			`"order_s": "NHWC",`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`input = nchw2nhwc(g, input)`
			`output = g.op("_caffe2::Int8MaxPool", input, **kwargs)`
			`output = nhwc2nchw(g, output)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "is", "is", "is", "i", "i", "none")`
			`def avg_pool2d(`
			`g: jit_utils.GraphContext,`
			`input,`
			`kernel_size,`
			`stride,`
			`padding,`
			`ceil_mode,`
			`count_include_pad,`
			`divisor_override=None,`
			`):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.avg_pool2d( # type: ignore[attr-defined]`
			`g,`
			`input,`
			`kernel_size,`
			`stride,`
			`padding,`
			`ceil_mode,`
			`count_include_pad,`
			`divisor_override,`
			`)`
			`kwargs = {`
			`"strides_i": stride,`
			`"pads_i": padding + padding,`
			`"kernel_i": kernel_size[0],`
			`"order_s": "NHWC",`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`input = nchw2nhwc(g, input)`
			`output = g.op("_caffe2::Int8AveragePool", input, **kwargs)`
			`output = nhwc2nchw(g, output)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`def reshape(g: jit_utils.GraphContext, input, shape):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.reshape(g, input, shape)`

			`kwargs = {`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`output = g.op("_caffe2::Int8Reshape", input, shape, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v", "v", "v", "v", "i")`
			`def slice(g: jit_utils.GraphContext, input, dim, start, end, step):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.slice(g, input, dim, start, end, step)`

			`if step != 1:`
			`raise RuntimeError("ONNX quantized slice export only works for step 1.")`
			`start = symbolic_helper._parse_arg(start, "i")`
			`end = symbolic_helper._parse_arg(end, "i")`
			`dim = symbolic_helper._parse_arg(dim, "i")`

			`kwargs = {`
			`"start_idx_i": start,`
			`"end_idx_i": end,`
			`"dim_i": dim,`
			`"Y_scale_f": symbolic_helper._node_get(input.node(), "Y_scale"),`
			`"Y_zero_point_i": symbolic_helper._node_get(input.node(), "Y_zero_point"),`
			`}`
			`output = g.op("_caffe2::Int8Slice", input, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`def cat(g: jit_utils.GraphContext, tensor_list, dim, scale=None, zero_point=None):`
			`tensors = symbolic_helper._unpack_list(tensor_list)`
			`input = tensors[0]`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.cat(g, tensor_list, dim)`

			`dim = symbolic_helper._parse_arg(dim, "i")`
			`kwargs = {`
			`"Y_scale_f": tensors[0].node()["Y_scale"],`
			`"Y_zero_point_i": tensors[0].node()["Y_zero_point"],`
			`}`
			`output = g.op("_caffe2::Int8Concat", tensors, axis_i=dim, *kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`


			`@symbolic_helper.parse_args("v")`
			`def sigmoid(g: jit_utils.GraphContext, input):`
			`if input not in symbolic_helper._quantized_ops:`
			`return opset9.sigmoid(g, input)`
			`# Caffe2 expects the output scale to be 1/2^8`
			`# and output zero_point to be 0 (quint8 type)`
			`out_scale = 1.0 / 256`
			`zero_point = 0`
			`kwargs = {`
			`"Y_scale_f": out_scale,`
			`"Y_zero_point_i": zero_point,`
			`}`
			`output = g.op("_caffe2::Int8Sigmoid", input, **kwargs)`
			`symbolic_helper._quantized_ops.add(output)`
			`return output`