# TVM

In [1]:
from matplotlib import pyplot as plt
import torch

from mod import load_mod

plt.ion()
# 载入自定义模块
load_mod()

from xinet import CV

In [2]:
import torch
from torchvision.models import quantization as models
from torch import nn

def create_combined_model(model_fe):
 # 步骤1:分离特征提取器
 model_fe_features = nn.Sequential(
 model_fe.quant, # 量化 input
 model_fe.conv1,
 model_fe.bn1,
 model_fe.relu,
 model_fe.maxpool,
 model_fe.layer1,
 model_fe.layer2,
 model_fe.layer3,
 model_fe.layer4,
 model_fe.avgpool,
 model_fe.dequant, # 反量化 output
 )

 # 步骤2:创建一个新的“头”
 new_head = nn.Sequential(
 nn.Dropout(p=0.5),
 nn.Linear(num_ftrs, 10),
 )

 # 步骤3:合并,不要忘记量化 stubs
 new_model = nn.Sequential(
 model_fe_features,
 nn.Flatten(1),
 new_head,
 )
 return new_model

batch_size = 128
train_iter, test_iter = CV.load_data_cifar10(batch_size=batch_size)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
import numpy as np
import time
import torch

import sys
TVM_HOME = '/media/pc/data/4tb/xinet/tvm'
sys.path.extend([f'{TVM_HOME}/python', f'{TVM_HOME}/vta/python'])

import tvm
from tvm import relay


def find_topk(array, k, axis=-1, largest=True, sorted=True):
 if axis is None:
 axis_size = array.size
 else:
 axis_size = array.shape[axis]
 assert 1 <= k <= axis_size

 array = np.asanyarray(array)
 if largest:
 index_array = np.argpartition(array, axis_size-k, axis=axis)
 topk_indices = np.take(index_array, -np.arange(k)-1, axis=axis)
 else:
 index_array = np.argpartition(array, k-1, axis=axis)
 topk_indices = np.take(index_array, np.arange(k), axis=axis)
 topk_values = np.take_along_axis(array, topk_indices, axis=axis)
 if sorted:
 sorted_indices_in_topk = np.argsort(topk_values, axis=axis)
 if largest:
 sorted_indices_in_topk = np.flip(sorted_indices_in_topk, axis=axis)
 sorted_topk_values = np.take_along_axis(
 topk_values, sorted_indices_in_topk, axis=axis)
 sorted_topk_indices = np.take_along_axis(
 topk_indices, sorted_indices_in_topk, axis=axis)
 return sorted_topk_values, sorted_topk_indices
 return topk_values, topk_indices


def calibrate_dataset(val_loader, calibration_samples, batch_size):
 for i, (input, _) in enumerate(val_loader):
 if i * batch_size >= calibration_samples:
 break
 yield {"input": input.numpy()}


def quantize(mod, params, data_aware, val_loader, calibration_samples=500, batch_size=1):
 if data_aware:
 print("tvm calibration quantize begin---------------------------->>")
 with relay.quantize.qconfig(calibrate_mode="kl_divergence", weight_scale="max", skip_conv_layers=[0], skip_dense_layer=True):
 mod = relay.quantize.quantize(
 mod, params, dataset=calibrate_dataset(val_loader, calibration_samples, batch_size))
 #print(mod)
 print("tvm calibration quantize end---------------------------->>")
 else:
 print("tvm global scale quantize begin---------------------------->>")
 with relay.quantize.qconfig(calibrate_mode="global_scale", global_scale=8.0):
 mod = relay.quantize.quantize(mod, params)
 print("tvm global scale quantize end---------------------------->>")
 return mod


def run_tvm_model(mod, params, target="llvm"):
 with tvm.transform.PassContext(opt_level=3):
 lib = relay.build(mod, target=target, params=params)
 runtime = tvm.contrib.graph_executor.GraphModule(
 lib["default"](tvm.device(target, 0)))
 return runtime


def tvm_model(model, batch_size):
 input_shape = (batch_size, 3, 32, 32)
 shape_list = [("input", input_shape)]
 input_data = torch.randn(input_shape)
 scripted_model = torch.jit.trace(model, input_data).eval()
 mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
 return mod, params


def tvm_test(model, val_loader,
 batch_size, data_aware,
 calibration_samples=500,
 print_freq=100,
 pre_quantization=False):
 mod, params = tvm_model(model, batch_size)
 if not pre_quantization:
 mod = quantize(mod, params, data_aware, val_loader,
 calibration_samples=calibration_samples,
 batch_size=batch_size)
 runtime = run_tvm_model(mod, params)
 #print(runtime.benchmark(dev, number=1, repeat=100))

 test_nums = len(val_loader)
 top1_correct = 0
 top5_correct = 0
 print('llvm inference-------------->>')
 for i, (input, label) in enumerate(val_loader, 1):
 runtime.set_input('input', input)
 runtime.run()
 output = runtime.get_output(0).asnumpy()

 # find topk index
 _, preds = find_topk(output, 5)
 print(preds, label)
 if label.item() == preds[0][0]:
 top1_correct += 1

 if label.item() in preds[0]:
 top5_correct += 1

 if i % print_freq == 0:
 print('Test: [{}/{}] \t'
 'Acc@1 {:.4f} \t'
 'Acc@5 {:.4f}'.format(
 i, test_nums, top1_correct / i, top5_correct / i))

 top1 = top1_correct / test_nums
 top5 = top5_correct / test_nums
 print(' * Acc@1 {:.4f} Acc@5 {:.4f}'
 .format(top1, top5))

 time_start = time.time()
 repeat = 100
 for i, (input, label) in enumerate(val_loader, 1):
 for r in range(repeat):
 runtime.set_input('input', input)
 runtime.run()
 output = runtime.get_output(0).asnumpy()
 time_end = time.time()
 print("平均推理时间:", (time_end - time_start)/repeat)
 exit()


In [4]:
# 注意 `quantize=False`
model = models.resnet18(pretrained=True, progress=True, quantize=False)
num_ftrs = model.fc.in_features

# Step 1
model.train()
# model.fuse_model()
# Step 2
model_ft = create_combined_model(model)

for param in model_ft.parameters():
 param.requires_grad = True


tvm_test(model_ft, test_iter,
 batch_size=1, data_aware=True,
 calibration_samples=500,
 print_freq=100,
 pre_quantization=False)

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 512, 1, 1])