测试自动量化#
import numpy as np
import tvm
from tvm import te
from tvm import relay
from tvm.relay import testing
from tvm.relay.expr import Call
from tvm.topi.utils import get_const_tuple
def quantize_and_build(out, skip_conv_layers=[]):
f = relay.Function(relay.analysis.free_vars(out), out)
mod, params = testing.create_workload(f)
with relay.quantize.qconfig(skip_conv_layers=skip_conv_layers):
qmod = relay.quantize.quantize(mod, params)
relay.build(qmod, "llvm", params=params)
return mod, qmod
relay.transform.FuseOps??
Signature: relay.transform.FuseOps(fuse_opt_level=-1)
Source:
def FuseOps(fuse_opt_level=-1):
"""Fuse operators in an expr to a larger operator according to some rules.
Parameters
----------
fuse_opt_level : int
The level of fuse optimization. -1 indicates that the level will be
inferred from pass context.
Returns
-------
ret : tvm.transform.Pass
The registered pass for operator fusion.
"""
return _ffi_api.FuseOps(fuse_opt_level)
File: /media/pc/data/lxw/ai/tvm/xinetzone/__pypackages__/3.10/lib/tvm/relay/transform/transform.py
Type: function
乘法算子的右操作数不是常量#
data = relay.var("data", shape=(1, 16, 64, 64))
multiplier = relay.sigmoid(relay.var("data", shape=(1, 16, 1, 1)))
conv = relay.nn.conv2d(
data, relay.var("weight"), kernel_size=(3, 3), padding=(1, 1), channels=16
)
act = relay.nn.relu(data=conv)
mod, qmod = quantize_and_build(act * multiplier)
mod.show()
qmod.show()
pool = relay.nn.global_avg_pool2d(data=act)
mod, qmod = quantize_and_build(act * pool)
mod.show()
qmod.show()
One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.
/media/pc/data/lxw/ai/tvm/xinetzone/__pypackages__/3.10/lib/tvm/script/highlight.py:117: UserWarning: No module named 'black'
To print formatted TVM script, please install the formatter 'Black':
/media/pc/data/tmp/cache/conda/envs/tvmz/bin/python -m pip install "black==22.3.0" --upgrade --user
warnings.warn(
/media/pc/data/lxw/ai/tvm/xinetzone/__pypackages__/3.10/lib/tvm/script/highlight.py:117: UserWarning: No module named 'black'
To print formatted TVM script, please install the formatter 'Black':
/media/pc/data/tmp/cache/conda/envs/tvmz/bin/python -m pip install "black==22.3.0" --upgrade --user
warnings.warn(
/media/pc/data/lxw/ai/tvm/xinetzone/__pypackages__/3.10/lib/tvm/script/highlight.py:117: UserWarning: No module named 'black'
To print formatted TVM script, please install the formatter 'Black':
/media/pc/data/tmp/cache/conda/envs/tvmz/bin/python -m pip install "black==22.3.0" --upgrade --user
warnings.warn(
/media/pc/data/lxw/ai/tvm/xinetzone/__pypackages__/3.10/lib/tvm/script/highlight.py:117: UserWarning: No module named 'black'
To print formatted TVM script, please install the formatter 'Black':
/media/pc/data/tmp/cache/conda/envs/tvmz/bin/python -m pip install "black==22.3.0" --upgrade --user
warnings.warn(
def @main(%data: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */, %data1: Tensor[(1, 16, 1, 1), float32] /* ty=Tensor[(1, 16, 1, 1), float32] */) -> Tensor[(1, 16, 64, 64), float32] {
%0 = nn.conv2d(%data, meta[relay.Constant][0], padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%1 = nn.relu(%0) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%2 = sigmoid(%data1) /* ty=Tensor[(1, 16, 1, 1), float32] */;
multiply(%1, %2) /* ty=Tensor[(1, 16, 64, 64), float32] */
}
def @main(%data: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */, %data1: Tensor[(1, 16, 1, 1), float32] /* ty=Tensor[(1, 16, 1, 1), float32] */) -> Tensor[(1, 16, 64, 64), float32] {
%0 = multiply(%data, 16f /* ty=float32 */) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%1 = round(%0) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%2 = clip(%1, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%3 = cast(%2, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%4 = nn.conv2d(%3, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 16, 64, 64), int32] */;
%5 = nn.relu(%4) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%6 = add(%5, 256 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%7 = right_shift(%6, 9 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%8 = clip(%7, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%9 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%10 = annotation.stop_fusion(%9) /* ty=Tensor[(1, 16, 64, 64), int8] */;
%11 = sigmoid(%data1) /* ty=Tensor[(1, 16, 1, 1), float32] */;
%12 = multiply(%11, 16f /* ty=float32 */) /* ty=Tensor[(1, 16, 1, 1), float32] */;
%13 = round(%12) /* ty=Tensor[(1, 16, 1, 1), float32] */;
%14 = clip(%13, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 1, 1), float32] */;
%15 = cast(%10, dtype="int32") /* ty=Tensor[(1, 16, 64, 64), int32] */;
%16 = cast(%14, dtype="int32") /* ty=Tensor[(1, 16, 1, 1), int32] */;
%17 = multiply(%15, %16) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%18 = add(%17, 8 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%19 = right_shift(%18, 4 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%20 = clip(%19, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%21 = cast(%20, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%22 = annotation.stop_fusion(%21) /* ty=Tensor[(1, 16, 64, 64), int8] */;
%23 = cast(%22, dtype="float32") /* ty=Tensor[(1, 16, 64, 64), float32] */;
multiply(%23, 0.0625f /* ty=float32 */) /* ty=Tensor[(1, 16, 64, 64), float32] */
}
def @main(%data: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */) -> Tensor[(1, 16, 64, 64), float32] {
%0 = nn.conv2d(%data, meta[relay.Constant][0], padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%1 = nn.relu(%0) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%2 = nn.global_avg_pool2d(%1) /* ty=Tensor[(1, 16, 1, 1), float32] */;
multiply(%1, %2) /* ty=Tensor[(1, 16, 64, 64), float32] */
}
def @main(%data: Tensor[(1, 16, 64, 64), float32] /* ty=Tensor[(1, 16, 64, 64), float32] */) -> Tensor[(1, 16, 64, 64), float32] {
%0 = multiply(%data, 16f /* ty=float32 */) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%1 = round(%0) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%2 = clip(%1, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 64, 64), float32] */;
%3 = cast(%2, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%4 = nn.conv2d(%3, meta[relay.Constant][0] /* ty=Tensor[(16, 16, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 16, 64, 64), int32] */;
%5 = nn.relu(%4) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%6 = add(%5, 256 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%7 = right_shift(%6, 9 /* ty=int32 */) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%8 = clip(%7, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%9 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%10 = annotation.stop_fusion(%9) /* ty=Tensor[(1, 16, 64, 64), int8] */;
%11 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 64, 64), int8] */;
%12 = annotation.stop_fusion(%11) /* ty=Tensor[(1, 16, 64, 64), int8] */;
%13 = cast(%12, dtype="int32") /* ty=Tensor[(1, 16, 64, 64), int32] */;
%14 = cast(%10, dtype="int32") /* ty=Tensor[(1, 16, 64, 64), int32] */;
%15 = nn.global_avg_pool2d(%13) /* ty=Tensor[(1, 16, 1, 1), int32] */;
%16 = multiply(%14, %15) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%17 = annotation.stop_fusion(%16) /* ty=Tensor[(1, 16, 64, 64), int32] */;
%18 = cast(%17, dtype="float32") /* ty=Tensor[(1, 16, 64, 64), float32] */;
multiply(%18, 0.00390625f /* ty=float32 */) /* ty=Tensor[(1, 16, 64, 64), float32] */
}
跳过卷积#
data = relay.var("data", shape=(1, 16, 64, 64))
np_weight = np.random.rand(16, 16, 3, 3)
conv0_weight = relay.Constant(tvm.nd.array(np_weight)).astype("float32")
conv1_weight = relay.Constant(tvm.nd.array(np_weight)).astype("float32")
multiplier = relay.sigmoid(relay.var("data", shape=(1, 16, 1, 1)))
conv0 = relay.nn.conv2d(data, conv0_weight, kernel_size=(3, 3), padding=(1, 1), channels=16)
act0 = relay.nn.relu(data=conv0)
conv1 = relay.nn.conv2d(act0, conv1_weight, kernel_size=(3, 3), padding=(1, 1), channels=16)
act1 = relay.nn.relu(data=conv1)
quantize_and_build(act1 * multiplier)
quantize_and_build(act1 * multiplier, skip_conv_layers=[0])
quantize_and_build(act1 * multiplier, skip_conv_layers=[1])
mod, qmod = quantize_and_build(act1 * multiplier, skip_conv_layers=[0, 1])
stop_quantize
#
data = relay.var("data", shape=(1, 16, 64, 64))
np_weight0 = np.random.rand(16, 16, 3, 3)
conv0_weight = relay.Constant(tvm.nd.array(np_weight0)).astype("float32")
np_weight1 = np.random.rand(16, 16, 1, 1)
conv1_weight = relay.Constant(tvm.nd.array(np_weight1)).astype("float32")
multiplier = relay.sigmoid(relay.var("data", shape=(1, 16, 1, 1)))
conv0 = relay.nn.conv2d(data, conv0_weight, kernel_size=(3, 3), padding=(1, 1), channels=16)
act0 = relay.nn.relu(data=conv0)
pool = relay.nn.global_avg_pool2d(data=act0)
conv1 = relay.nn.conv2d(pool, conv1_weight, kernel_size=(1, 1), padding=(0, 0), channels=16)
act1 = relay.nn.relu(data=conv1)
mod, qmod = quantize_and_build(act1 * multiplier)
mod.show()
qmod.show()
batch_flatten
#
data = relay.var("data", shape=(1, 16, 64, 64), dtype="float32")
out = relay.nn.conv2d(
data, relay.var("weight"), kernel_size=(3, 3), padding=(1, 1), channels=16
)
out = relay.nn.batch_flatten(out)
mod, qmod = quantize_and_build(out)
def _check_batch_flatten(node):
if isinstance(node, Call):
if node.op.name == "nn.batch_flatten":
assert node.checked_type.dtype == "int8"
# check if batch_flatten is quantized
relay.analysis.post_order_visit(qmod["main"], _check_batch_flatten)
batch_matmul
#
data = relay.var("data", shape=(1, 4, 16, 16))
data2 = relay.sigmoid(relay.var("data", shape=(4, 16, 64)))
out = relay.nn.conv2d(data, relay.var("weight"), kernel_size=(3, 3), padding=(1, 1), channels=8)
out = relay.nn.batch_flatten(out)
out = relay.reshape(out, [1, 32, 64])
out = relay.nn.batch_matmul(out, data2)
mod, qmod = quantize_and_build(out)
def _check_batch_matmul(node):
if isinstance(node, Call):
if node.op.name in ["nn.batch_matmul", "nn.conv2d"]:
assert node.checked_type.dtype == "int32"
elif node.op.name == "nn.batch_flatten":
assert node.checked_type.dtype == "int8"
# check if batch_matmul is quantized
relay.analysis.post_order_visit(qmod["main"], _check_batch_matmul)
calibration_dataset
#
def get_calibration_dataset(mod, input_name):
dataset = []
input_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
for i in range(5):
data = np.random.uniform(size=input_shape)
dataset.append({input_name: data})
return dataset
mod, params = testing.synthetic.get_workload()
dataset = get_calibration_dataset(mod, "data")
create_target = True
with relay.quantize.qconfig(calibrate_mode="kl_divergence"):
if create_target:
with tvm.target.Target("llvm"):
relay.quantize.quantize(mod, params, dataset)
else:
# current_target = None
relay.quantize.quantize(mod, params, dataset)
calibrate_memory_bound
:
mod, params = testing.synthetic.get_workload()
dataset = get_calibration_dataset(mod, "data")
import multiprocessing
num_cpu = multiprocessing.cpu_count()
with relay.quantize.qconfig(calibrate_mode="kl_divergence", calibrate_chunk_by=num_cpu):
relay.quantize.quantize(mod, params, dataset)
calibrate_percentile
:
mod, params = testing.synthetic.get_workload()
dataset = get_calibration_dataset(mod, "data")
with relay.quantize.qconfig(calibrate_mode="percentile"):
relay.quantize.quantize(mod, params, dataset)