partition_conversions
#
tvm.relay.quantize._partition_conversions.partition_conversions()
将模块划分为输入量化、核心量化推理和输出反量化。
import set_env
import numpy as np
import tvm
from tvm.runtime.vm import VirtualMachine
from tvm import relay
from torch import nn
import torch
class Model(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.conv = nn.Conv2d(3, 16, 3, 1, 1, bias=True)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
def create_model(ishape = (1, 3, 4, 4)):
pt_model = Model().eval().float()
input_shapes = [("data", ishape)]
# script_module = torch.jit.script(pt_model)
# mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
idata = torch.rand(ishape).type(torch.float32)
traced_model = torch.jit.trace(pt_model, idata)
# traced_model 翻译为 TVM 前端模型
mod, params = relay.frontend.from_pytorch(traced_model, input_shapes,
use_parser_friendly_name=True)
return mod, params
print(f"修改前量化配置:\n{relay.quantize.current_qconfig()}")
mod, params = create_model(ishape = (1, 3, 4, 4))
with tvm.transform.PassContext(opt_level=3):
with relay.quantize.qconfig(
skip_conv_layers=[],
do_simulation=True
):
print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
qmod = relay.quantize.quantize(mod, params)
print(qmod)
修改前量化配置:
qconfig(nbit_input=8, nbit_weight=8, nbit_activation=32, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==(nullptr), skip_dense_layer==1, do_simulation==0, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)
当前量化配置:
qconfig(nbit_input=8, nbit_weight=8, nbit_activation=32, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==[], skip_dense_layer==1, do_simulation==1, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[3], line 9
4 with relay.quantize.qconfig(
5 skip_conv_layers=[],
6 do_simulation=True
7 ):
8 print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
----> 9 qmod = relay.quantize.quantize(mod, params)
10 print(qmod)
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/quantize.py:370, in quantize(mod, params, dataset)
366 with tvm.transform.PassContext(
367 opt_level=3, required_pass=["QuantizeAnnotate", "QuantizeCalibrate", "QuantizeRealize"]
368 ):
369 with quantize_context():
--> 370 mod = quantize_seq(mod)
372 q_cfg = current_qconfig()
373 assert q_cfg.partition_conversions in ["disabled", "enabled", "fully_integral"]
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/ir/transform.py:238, in Pass.__call__(self, mod)
224 def __call__(self, mod):
225 """Execute the pass. Note that for sequential pass, the dependency among
226 different passes will be resolved in the backend.
227
(...) 236 The updated module after applying this pass.
237 """
--> 238 return _ffi_transform_api.RunPass(self, mod)
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/_ffi/_ctypes/packed_func.py:245, in PackedFuncBase.__call__(self, *args)
233 ret_tcode = ctypes.c_int()
234 if (
235 _LIB.TVMFuncCall(
236 self.handle,
(...) 243 != 0
244 ):
--> 245 raise_last_ffi_error()
246 _ = temp_args
247 _ = args
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/_ffi/base.py:481, in raise_last_ffi_error()
475 # The exception PyObject may contain a large amount of state,
476 # including all stack frames that may be inspected in a later
477 # PDB post-mortem. Therefore, we must make sure to remove the
478 # underlying PyObject* from the C++ side after we retrieve it.
479 _LIB.TVMDropLastPythonError()
--> 481 raise py_err
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:236, in calibrate.<locals>.wrapped_func(mod, _)
233 else:
234 raise ValueError(f"Unknown weight scale mode {cfg.weight_scale}")
--> 236 return _set_params(mod, input_scale_func, weight_scale_func)
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:168, in _set_params(mod, input_scale_func, weight_scale_func)
165 const_params[nclip_max] = _make_const((valid_range - 1))
167 main_func = mod["main"]
--> 168 _analysis.post_order_visit(main_func, visit_func)
169 main_func = _expr.bind(main_func, const_params)
170 func_dict = {}
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/analysis/analysis.py:44, in post_order_visit(expr, fvisit)
31 def post_order_visit(expr, fvisit):
32 """Recursively visit the ir in post DFS order node,
33 apply fvisit. Each node is guaranteed to be visited
34 only once.
(...) 42 The visitor function to be applied.
43 """
---> 44 return _ffi_api.post_order_visit(expr, fvisit)
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:427, in operator()()
425
426 TVM_REGISTER_GLOBAL("relay.analysis.post_order_visit").set_body_typed([](Expr expr, PackedFunc f) {
--> 427 PostOrderVisit(expr, [f](const Expr& n) { f(n); });
428 });
429
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:423, in tvm::relay::PostOrderVisit(tvm::RelayExpr const&, std::function<void (tvm::RelayExpr const&)>)()
421
422 void PostOrderVisit(const Expr& e, std::function<void(const Expr&)> fvisit) {
--> 423 ExprApplyVisit(fvisit).VisitExpr(e);
424 }
425
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
411 if (visited_.count(e.get()) != 0) return;
412 visited_.insert(e.get());
--> 413 ExprVisitor::VisitExpr(e);
414 f_(e);
415 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
293 } else {
294 using TParent = ExprFunctor<void(const Expr&)>;
--> 295 TParent::VisitExpr(expr);
296 visit_counter_.insert({expr.get(), 1});
297 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
411 if (visited_.count(e.get()) != 0) return;
412 visited_.insert(e.get());
--> 413 ExprVisitor::VisitExpr(e);
414 f_(e);
415 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
293 } else {
294 using TParent = ExprFunctor<void(const Expr&)>;
--> 295 TParent::VisitExpr(expr);
296 visit_counter_.insert({expr.get(), 1});
297 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
334
335 for (auto arg : op->args) {
--> 336 this->VisitExpr(arg);
337 }
338 }
[... skipping similar frames: tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&) at line 413 (1 times), tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) at line 295 (1 times)]
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
334
335 for (auto arg : op->args) {
--> 336 this->VisitExpr(arg);
337 }
338 }
[... skipping similar frames: tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&) at line 413 (4 times), tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) at line 295 (4 times), tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*) at line 336 (4 times)]
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
411 if (visited_.count(e.get()) != 0) return;
412 visited_.insert(e.get());
--> 413 ExprVisitor::VisitExpr(e);
414 f_(e);
415 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
293 } else {
294 using TParent = ExprFunctor<void(const Expr&)>;
--> 295 TParent::VisitExpr(expr);
296 visit_counter_.insert({expr.get(), 1});
297 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
334
335 for (auto arg : op->args) {
--> 336 this->VisitExpr(arg);
337 }
338 }
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:155, in _set_params.<locals>.visit_func(expr)
153 if kind == quantize.QAnnotateKind.WEIGHT:
154 assert isinstance(expr.args[0], _expr.Constant)
--> 155 scale = weight_scale_func(expr)
156 else:
157 scale = input_scale_func(expr)
File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:183, in _power2_scale(sq_call)
181 assert isinstance(var, _expr.Constant)
182 val = np.amax(np.abs(var.data.numpy()))
--> 183 return 2 ** np.math.ceil(np.math.log(val, 2)) if val > 0 else 1.0
File /media/pc/data/lxw/envs/anaconda3a/envs/tvm-env/lib/python3.13/site-packages/numpy/__init__.py:414, in __getattr__(attr)
411 import numpy.char as char
412 return char.chararray
--> 414 raise AttributeError("module {!r} has no attribute "
415 "{!r}".format(__name__, attr))
AttributeError: module 'numpy' has no attribute 'math'
mod, params = create_model(ishape = (1, 3, 4, 4))
with tvm.transform.PassContext(opt_level=3):
with relay.quantize.qconfig(
skip_conv_layers=[],
partition_conversions="enabled",
do_simulation=False
):
print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
qmod = relay.quantize.quantize(mod, params)
print(qmod)
dev = tvm.cpu()
data_np = np.random.uniform(low=-1, high=1, size=[1, 3, 4, 4]).astype("float32")
input_dict = {"data": data_np}
with tvm.transform.PassContext(opt_level=3):
qvm_exec = relay.vm.compile(qmod, target="llvm", params=params)
qvm = VirtualMachine(qvm_exec, dev)
qvm.set_input("main", **input_dict)
tvm_qres = qvm.run()