# 测试自定义的 TVM 量化

In [1]:
from configs import set_tool, set_env
from models import model_names, Calibrate, InputConfig, Model
from common.configs.frontend import from_frontend
from configs.create_config import Config
from tqdm import tqdm

In [2]:
import tvm
from tvm import relay
import toml
from d2py.utils.log_config import config_logging
# 配置日志消息
log_dir = ".temp"
config_logging(
    f"{log_dir}/test.log", 
    'debug',
    filter_mod_names={"vta", "te_compiler"}, # 过滤掉不需要记录日志的模块
    filemode="w",
)

In [3]:
with open("configs/model.toml") as fp:
    configs = toml.load(fp)
for model_name, config in configs.items():
    config = Config(**config)
    const = InputConfig(model_name)
    assert const.input_name == config.name

2024-02-18 16:53:15.404892: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-18 16:53:15.404960: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-18 16:53:15.406146: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-18 16:53:15.416229: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
import numpy as np
import tvm
from tvm import relay
from tvm.relay import transform as _transform
from tvm.relay import expr as _expr
from tvm.relay import Call, Constant, Function
from tvm.ir.op import Op
from tvm.relay.dataflow_pattern import (
    is_constant, is_op, is_tuple, wildcard, 
    is_tuple_get_item
)
from tvm.relay.quantize.quantize import _bind_params
from tools.pattern.float import *
from tools.common import FuseTransform

In [5]:
compiler_name="vta_special"
pattern_table = [
    (f"{compiler_name}.conv2d_bias_relu_maxpool2d", make_conv2d_bias_relu_maxpool2d_pattern()),
    (f"{compiler_name}.conv2d_bias", make_conv2d_bias_pattern()),
    (f"{compiler_name}.dense_bias", make_dense_bias_pattern()),
    (f"{compiler_name}.elwise", make_elwise_pattern()),
]
merge_passes = tvm.transform.Sequential([
    _transform.InferType(),
    _transform.MergeComposite(pattern_table), # 融合算子
    _transform.InferType(),
    # _transform.PartitionGraph()
])

In [6]:
# "person_chair" 需要重写前端

In [7]:
for model_name, config in tqdm(configs.items()):
    if model_name in [
        # "resnet50_v2", "mobilenet_v2_tf", 
        "person", "new_person", "face_rec",
        "face_detection_580", "face_detection", "fd_quintina",
        "driver", # The following operators are not supported in frontend Caffe: 'Upsample'
        "fr_karen", "fr_madeline"]:
        continue
    config = Config(**config)
    if model_name == "resnet50_v2":
        # 加载前端模型
        if config.model_type == 'caffe': # caffe 前端模型
            caffe_model, prototxt = Model(config.model_type, model_name)()
            shape_dict = {config.name: config.shape}
            dtype_dict = {config.name: "float32"}
            mod, params = relay.frontend.from_caffe(caffe_model, prototxt, shape_dict, dtype_dict)
        elif config.model_type == 'torch': # PyTorch 前端模型
            import torch
            torch_model = Model(config.model_type, model_name)().eval()
            trace_model = torch.jit.trace(torch_model, torch.randn(*config.shape))
            mod, params = relay.frontend.from_pytorch(trace_model.eval(), [(config.name, config.shape)])
        elif config.model_type == 'onnx': # onnx 前端模型
            onnx_model = Model(config.model_type, model_name)()
            mod, params = relay.frontend.from_onnx(
                onnx_model, 
                {config.name: config.shape}, 
                "float32", opset=15, 
                freeze_params=True
            )
        else:
            raise TypeError(f"{config.model_type} 暂未支持")
        
        with tvm.transform.PassContext(opt_level=3):
            run_mod = relay.quantize.prerequisite_optimize(mod, params)
            run_mod_mg = merge_passes(run_mod)
            fuse_mod = FuseTransform()(run_mod_mg)
            tvm.IRModule.from_expr(fuse_mod["main"]).show()
        break

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:24<?, ?it/s]
