partition_conversions

partition_conversions#

tvm.relay.quantize._partition_conversions.partition_conversions() 将模块划分为输入量化、核心量化推理和输出反量化。

import set_env
import numpy as np
import tvm
from tvm.runtime.vm import VirtualMachine
from tvm import relay
from torch import nn
import torch

class Model(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.conv = nn.Conv2d(3, 16, 3, 1, 1, bias=True)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

def create_model(ishape = (1, 3, 4, 4)):
    pt_model = Model().eval().float()
    input_shapes = [("data", ishape)]
    # script_module = torch.jit.script(pt_model)
    # mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
    idata = torch.rand(ishape).type(torch.float32)
    traced_model = torch.jit.trace(pt_model, idata)
    # traced_model 翻译为 TVM 前端模型
    mod, params = relay.frontend.from_pytorch(traced_model, input_shapes, 
                                              use_parser_friendly_name=True)
    return mod, params
print(f"修改前量化配置:\n{relay.quantize.current_qconfig()}")
mod, params = create_model(ishape = (1, 3, 4, 4))
with tvm.transform.PassContext(opt_level=3):
    with relay.quantize.qconfig(
        skip_conv_layers=[],
        do_simulation=True
    ):
        print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
        qmod = relay.quantize.quantize(mod, params)
print(qmod)
修改前量化配置:
qconfig(nbit_input=8, nbit_weight=8, nbit_activation=32, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==(nullptr), skip_dense_layer==1, do_simulation==0, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)
当前量化配置:
qconfig(nbit_input=8, nbit_weight=8, nbit_activation=32, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==[], skip_dense_layer==1, do_simulation==1, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[3], line 9
      4     with relay.quantize.qconfig(
      5         skip_conv_layers=[],
      6         do_simulation=True
      7     ):
      8         print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
----> 9         qmod = relay.quantize.quantize(mod, params)
     10 print(qmod)

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/quantize.py:370, in quantize(mod, params, dataset)
    366 with tvm.transform.PassContext(
    367     opt_level=3, required_pass=["QuantizeAnnotate", "QuantizeCalibrate", "QuantizeRealize"]
    368 ):
    369     with quantize_context():
--> 370         mod = quantize_seq(mod)
    372 q_cfg = current_qconfig()
    373 assert q_cfg.partition_conversions in ["disabled", "enabled", "fully_integral"]

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/ir/transform.py:238, in Pass.__call__(self, mod)
    224 def __call__(self, mod):
    225     """Execute the pass. Note that for sequential pass, the dependency among
    226     different passes will be resolved in the backend.
    227 
   (...)    236         The updated module after applying this pass.
    237     """
--> 238     return _ffi_transform_api.RunPass(self, mod)

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/_ffi/_ctypes/packed_func.py:245, in PackedFuncBase.__call__(self, *args)
    233 ret_tcode = ctypes.c_int()
    234 if (
    235     _LIB.TVMFuncCall(
    236         self.handle,
   (...)    243     != 0
    244 ):
--> 245     raise_last_ffi_error()
    246 _ = temp_args
    247 _ = args

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/_ffi/base.py:481, in raise_last_ffi_error()
    475 # The exception PyObject may contain a large amount of state,
    476 # including all stack frames that may be inspected in a later
    477 # PDB post-mortem.  Therefore, we must make sure to remove the
    478 # underlying PyObject* from the C++ side after we retrieve it.
    479 _LIB.TVMDropLastPythonError()
--> 481 raise py_err

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:236, in calibrate.<locals>.wrapped_func(mod, _)
    233 else:
    234     raise ValueError(f"Unknown weight scale mode {cfg.weight_scale}")
--> 236 return _set_params(mod, input_scale_func, weight_scale_func)

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:168, in _set_params(mod, input_scale_func, weight_scale_func)
    165         const_params[nclip_max] = _make_const((valid_range - 1))
    167 main_func = mod["main"]
--> 168 _analysis.post_order_visit(main_func, visit_func)
    169 main_func = _expr.bind(main_func, const_params)
    170 func_dict = {}

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/analysis/analysis.py:44, in post_order_visit(expr, fvisit)
     31 def post_order_visit(expr, fvisit):
     32     """Recursively visit the ir in post DFS order node,
     33     apply fvisit. Each node is guaranteed to be visited
     34     only once.
   (...)     42         The visitor function to be applied.
     43     """
---> 44     return _ffi_api.post_order_visit(expr, fvisit)

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:427, in operator()()
    425 
    426 TVM_REGISTER_GLOBAL("relay.analysis.post_order_visit").set_body_typed([](Expr expr, PackedFunc f) {
--> 427   PostOrderVisit(expr, [f](const Expr& n) { f(n); });
    428 });
    429 

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:423, in tvm::relay::PostOrderVisit(tvm::RelayExpr const&, std::function<void (tvm::RelayExpr const&)>)()
    421 
    422 void PostOrderVisit(const Expr& e, std::function<void(const Expr&)> fvisit) {
--> 423   ExprApplyVisit(fvisit).VisitExpr(e);
    424 }
    425 

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
    411   if (visited_.count(e.get()) != 0) return;
    412   visited_.insert(e.get());
--> 413   ExprVisitor::VisitExpr(e);
    414   f_(e);
    415 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
    293 } else {
    294   using TParent = ExprFunctor<void(const Expr&)>;
--> 295   TParent::VisitExpr(expr);
    296   visit_counter_.insert({expr.get(), 1});
    297 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
    411   if (visited_.count(e.get()) != 0) return;
    412   visited_.insert(e.get());
--> 413   ExprVisitor::VisitExpr(e);
    414   f_(e);
    415 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
    293 } else {
    294   using TParent = ExprFunctor<void(const Expr&)>;
--> 295   TParent::VisitExpr(expr);
    296   visit_counter_.insert({expr.get(), 1});
    297 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
    334 
    335   for (auto arg : op->args) {
--> 336     this->VisitExpr(arg);
    337   }
    338 }

    [... skipping similar frames: tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&) at line 413 (1 times), tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) at line 295 (1 times)]

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
    334 
    335   for (auto arg : op->args) {
--> 336     this->VisitExpr(arg);
    337   }
    338 }

    [... skipping similar frames: tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&) at line 413 (4 times), tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) at line 295 (4 times), tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*) at line 336 (4 times)]

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:413, in tvm::relay::ExprApplyVisit::VisitExpr(tvm::RelayExpr const&)()
    411   if (visited_.count(e.get()) != 0) return;
    412   visited_.insert(e.get());
--> 413   ExprVisitor::VisitExpr(e);
    414   f_(e);
    415 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:295, in tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)()
    293 } else {
    294   using TParent = ExprFunctor<void(const Expr&)>;
--> 295   TParent::VisitExpr(expr);
    296   visit_counter_.insert({expr.get(), 1});
    297 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/src/relay/ir/expr_functor.cc:336, in tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::CallNode const*)()
    334 
    335   for (auto arg : op->args) {
--> 336     this->VisitExpr(arg);
    337   }
    338 }

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:155, in _set_params.<locals>.visit_func(expr)
    153 if kind == quantize.QAnnotateKind.WEIGHT:
    154     assert isinstance(expr.args[0], _expr.Constant)
--> 155     scale = weight_scale_func(expr)
    156 else:
    157     scale = input_scale_func(expr)

File /media/pc/data/board/arria10/lxw/tasks/tvm-ai/python/tvm/relay/quantize/_calibrate.py:183, in _power2_scale(sq_call)
    181 assert isinstance(var, _expr.Constant)
    182 val = np.amax(np.abs(var.data.numpy()))
--> 183 return 2 ** np.math.ceil(np.math.log(val, 2)) if val > 0 else 1.0

File /media/pc/data/lxw/envs/anaconda3a/envs/tvm-env/lib/python3.13/site-packages/numpy/__init__.py:414, in __getattr__(attr)
    411     import numpy.char as char
    412     return char.chararray
--> 414 raise AttributeError("module {!r} has no attribute "
    415                      "{!r}".format(__name__, attr))

AttributeError: module 'numpy' has no attribute 'math'
mod, params = create_model(ishape = (1, 3, 4, 4))
with tvm.transform.PassContext(opt_level=3):
    with relay.quantize.qconfig(
        skip_conv_layers=[],
        partition_conversions="enabled",
        do_simulation=False
    ):
        print(f"当前量化配置:\n{relay.quantize.current_qconfig()}\n")
        qmod = relay.quantize.quantize(mod, params)
print(qmod)
dev = tvm.cpu()
data_np = np.random.uniform(low=-1, high=1, size=[1, 3, 4, 4]).astype("float32")
input_dict = {"data": data_np}

with tvm.transform.PassContext(opt_level=3):
    qvm_exec = relay.vm.compile(qmod, target="llvm", params=params)
qvm = VirtualMachine(qvm_exec, dev)
qvm.set_input("main", **input_dict)
tvm_qres = qvm.run()