YOLO 简介#

参考:ultralytics

import torch
import sys
sys.path.append("..")
import set_env
from d2py.utils.file import mkdir
root_dir = ".temp"
mkdir(f"{root_dir}/logs")
# from ultralytics import YOLO
torch.cuda.empty_cache()

测试 YOLOv5n PyTorch 前端#

from PIL import Image
import numpy as np
from ultralytics import YOLO

input_path = "images/vehicle-jaguar-f-type-car-red-cars-wallpaper.jpg"
im = Image.open(input_path) #.resize((384, 640))
# self = YOLO("yolov8n-p2.yaml")
# self.load("yolov8n.pt")
self = YOLO("yolov5n.pt")
# results = self.train(data='coco.yaml', epochs=2)

# self.export(format="torchscript")
# results = self(input_path)
results = self(np.array(im), conf=0.25)
# results = postprocess(preds, im, [np.ascontiguousarray(image_np)], self.model.names)
Image.fromarray(results[0].plot())

导出 ONNX 模型:

self.export(format="onnx")

YOLOv5n 输入预处理#

from PIL import Image
import numpy as np
import torch
from tvm_book.data.augment import LetterBox
imgsz = 640, 640
strides = self.model.stride
mean = (0,)
std = (255,)
letterbox = LetterBox(imgsz, strides=strides, auto=False)

origin_image = np.asanyarray(Image.open(input_path))
letterbox_image = letterbox(image=origin_image)
xs = np.stack([letterbox_image - mean])
print(f"数据内存的连续性:{xs.flags["C_CONTIGUOUS"]}")
xs = xs.transpose((0, 3, 1, 2))  # BHWC to BCHW, (n, 3, h, w)
print(f"数据内存的连续性(transpose):{xs.flags["C_CONTIGUOUS"]}")
xs = np.ascontiguousarray(xs)  # contiguous
print(f"数据内存的连续性:{xs.flags["C_CONTIGUOUS"]}")
xs = xs.astype("float32") / std # 归一化值域范围为 0.0 - 1.0
Image.fromarray(np.concatenate([letterbox_image, (xs[0]*std).astype("uint8").transpose((1, 2, 0))], axis=1))

测试 YOLOv5n ONNX Relay 前端#

前端导入:

import onnx
from tvm import relay

input_name = "images"
onnx_model = onnx.load('yolov5nu.onnx')
mod, params = relay.frontend.from_onnx(onnx_model, {input_name: xs.shape}, freeze_params=True)

运行时推理:

import tvm

with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
    lib = relay.build(mod, target="llvm", params=params)
func = lib[lib.libmod_name]
module = tvm.contrib.graph_executor.GraphModule(func(tvm.cpu(0)))
module.run(**{input_name: xs})
num_outputs = module.get_num_outputs()
float_outputs = [module.get_output(k).numpy() for k in range(num_outputs)]

后处理:

from ultralytics.utils import ops
from ultralytics.engine.results import Results

def postprocess(preds, img, orig_imgs, names, input_path, conf_thres=0.25, iou_thres=0.45,):
    """Post-processes predictions and returns a list of Results objects."""
    preds = ops.non_max_suppression(
        preds,
        conf_thres=conf_thres,
        iou_thres=iou_thres,
        # agnostic=self.args.agnostic_nms,
        # max_det=self.args.max_det,
        # classes=80,
    )

    results = []
    for i, pred in enumerate(preds):
        orig_img = orig_imgs[i]
        pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
        img_path = input_path
        results.append(Results(orig_img, path=img_path, names=names, boxes=pred))
    return results
results = postprocess(
    [torch.from_numpy(o) for o in float_outputs], 
    xs, [origin_image], self.model.names, 
    input_path, conf_thres=0.25, iou_thres=0.45,
)
Image.fromarray(results[0].plot())
from tvm.relay.dataflow_pattern import rewrite
from tvm_book.transforms.yolo import Dist2xywhSimplify
import tvm

with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
    run_mod = tvm.IRModule.from_expr(rewrite(Dist2xywhSimplify(), mod["main"]))
    lib = relay.build(run_mod, target="llvm", params=params)

func = lib[lib.libmod_name]
module = tvm.contrib.graph_executor.GraphModule(func(tvm.cpu(0)))
module.run(**{input_name: xs})
num_outputs = module.get_num_outputs()
run_float_outputs = [module.get_output(k).numpy() for k in range(num_outputs)]
[
    np.testing.assert_allclose(a, b, rtol=1e-07, atol=1e-3)
    for a, b in zip(float_outputs, run_float_outputs)
]
results = postprocess(
    [torch.from_numpy(o) for o in run_float_outputs], 
    xs, [origin_image], self.model.names, 
    input_path, conf_thres=0.25, iou_thres=0.45,
)
Image.fromarray(results[0].plot())
run_mod.show()

裁剪 YOLOv5n 以探索计算图分割#

# from tvm.relay.dataflow_pattern import (
#     wildcard, is_constant, is_op, is_var, is_tuple, is_tuple_get_item
# )
from copy import deepcopy
from tvm.relay.analysis import extract_intermdeiate_expr
from tvm_book.compiler.utils import merge_compiler

run_mod = deepcopy(mod) 
# run_mod = extract_intermdeiate_expr(run_mod, 110)
with tvm.transform.PassContext(opt_level=3):
    run_mod["main"] = rewrite(Dist2xywhSimplify(), run_mod["main"])
    run_mod = relay.quantize.prerequisite_optimize(run_mod, params)
    run_mod = merge_compiler(run_mod, compiler_name="vta_special")
print(run_mod["main"])
fn (%images: Tensor[(1, 3, 640, 640), float32] /* ty=Tensor[(1, 3, 640, 640), float32] span=/model.0/conv/Conv.images:0:0 */) -> Tensor[(1, 84, 8400), float32] {
  %0 = @vta_special.conv2d_0(%images, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 6, 6), float32] span=/model.0/conv/Conv.model.0.conv.weight:0:0 */, meta[relay.Constant][1] /* ty=Tensor[(16, 1, 1), float32] */) /* ty=Tensor[(1, 16, 320, 320), float32] */;
  %1 = @vta_special.conv2d_1(%0, meta[relay.Constant][2] /* ty=Tensor[(32, 16, 3, 3), float32] span=/model.1/conv/Conv.model.1.conv.weight:0:0 */, meta[relay.Constant][3] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 160, 160), float32] */;
  %2 = @vta_special.conv2d_2(%1, meta[relay.Constant][4] /* ty=Tensor[(16, 32, 1, 1), float32] span=/model.2/cv1/conv/Conv.model.2.cv1.conv.weight:0:0 */, meta[relay.Constant][5] /* ty=Tensor[(16, 1, 1), float32] */) /* ty=Tensor[(1, 16, 160, 160), float32] */;
  %3 = @vta_special.conv2d_3(%2, meta[relay.Constant][6] /* ty=Tensor[(16, 16, 1, 1), float32] span=/model.2/m/m.0/cv1/conv/Conv.model.2.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][7] /* ty=Tensor[(16, 1, 1), float32] */) /* ty=Tensor[(1, 16, 160, 160), float32] */;
  %4 = @vta_special.conv2d_4(%3, meta[relay.Constant][8] /* ty=Tensor[(16, 16, 3, 3), float32] span=/model.2/m/m.0/cv2/conv/Conv.model.2.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][9] /* ty=Tensor[(16, 1, 1), float32] */) /* ty=Tensor[(1, 16, 160, 160), float32] */;
  %5 = @vta_special.add_5(%2, %4) /* ty=Tensor[(1, 16, 160, 160), float32] */;
  %6 = @vta_special.conv2d_6(%1, meta[relay.Constant][10] /* ty=Tensor[(16, 32, 1, 1), float32] span=/model.2/cv2/conv/Conv.model.2.cv2.conv.weight:0:0 */, meta[relay.Constant][11] /* ty=Tensor[(16, 1, 1), float32] */) /* ty=Tensor[(1, 16, 160, 160), float32] */;
  %7 = @vta_special.concat_4dim_2tensor_7(%5, %6) /* ty=Tensor[(1, 32, 160, 160), float32] */;
  %8 = @vta_special.conv2d_8(%7, meta[relay.Constant][12] /* ty=Tensor[(32, 32, 1, 1), float32] span=/model.2/cv3/conv/Conv.model.2.cv3.conv.weight:0:0 */, meta[relay.Constant][13] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 160, 160), float32] */;
  %9 = @vta_special.conv2d_9(%8, meta[relay.Constant][14] /* ty=Tensor[(64, 32, 3, 3), float32] span=/model.3/conv/Conv.model.3.conv.weight:0:0 */, meta[relay.Constant][15] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %10 = @vta_special.conv2d_10(%9, meta[relay.Constant][16] /* ty=Tensor[(32, 64, 1, 1), float32] span=/model.4/cv1/conv/Conv.model.4.cv1.conv.weight:0:0 */, meta[relay.Constant][17] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %11 = @vta_special.conv2d_11(%10, meta[relay.Constant][18] /* ty=Tensor[(32, 32, 1, 1), float32] span=/model.4/m/m.0/cv1/conv/Conv.model.4.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][19] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %12 = @vta_special.conv2d_12(%11, meta[relay.Constant][20] /* ty=Tensor[(32, 32, 3, 3), float32] span=/model.4/m/m.0/cv2/conv/Conv.model.4.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][21] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %13 = @vta_special.add_13(%10, %12) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %14 = @vta_special.conv2d_14(%13, meta[relay.Constant][22] /* ty=Tensor[(32, 32, 1, 1), float32] span=/model.4/m/m.1/cv1/conv/Conv.model.4.m.1.cv1.conv.weight:0:0 */, meta[relay.Constant][23] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %15 = @vta_special.conv2d_15(%14, meta[relay.Constant][24] /* ty=Tensor[(32, 32, 3, 3), float32] span=/model.4/m/m.1/cv2/conv/Conv.model.4.m.1.cv2.conv.weight:0:0 */, meta[relay.Constant][25] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %16 = @vta_special.add_16(%13, %15) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %17 = @vta_special.conv2d_17(%9, meta[relay.Constant][26] /* ty=Tensor[(32, 64, 1, 1), float32] span=/model.4/cv2/conv/Conv.model.4.cv2.conv.weight:0:0 */, meta[relay.Constant][27] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %18 = @vta_special.concat_4dim_2tensor_18(%16, %17) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %19 = @vta_special.conv2d_19(%18, meta[relay.Constant][28] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.4/cv3/conv/Conv.model.4.cv3.conv.weight:0:0 */, meta[relay.Constant][29] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %20 = @vta_special.conv2d_20(%19, meta[relay.Constant][30] /* ty=Tensor[(128, 64, 3, 3), float32] span=/model.5/conv/Conv.model.5.conv.weight:0:0 */, meta[relay.Constant][31] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %21 = @vta_special.conv2d_21(%20, meta[relay.Constant][32] /* ty=Tensor[(64, 128, 1, 1), float32] span=/model.6/cv1/conv/Conv.model.6.cv1.conv.weight:0:0 */, meta[relay.Constant][33] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %22 = @vta_special.conv2d_22(%21, meta[relay.Constant][34] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.6/m/m.0/cv1/conv/Conv.model.6.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][35] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %23 = @vta_special.conv2d_23(%22, meta[relay.Constant][36] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.6/m/m.0/cv2/conv/Conv.model.6.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][37] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %24 = @vta_special.add_24(%21, %23) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %25 = @vta_special.conv2d_25(%24, meta[relay.Constant][38] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.6/m/m.1/cv1/conv/Conv.model.6.m.1.cv1.conv.weight:0:0 */, meta[relay.Constant][39] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %26 = @vta_special.conv2d_26(%25, meta[relay.Constant][40] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.6/m/m.1/cv2/conv/Conv.model.6.m.1.cv2.conv.weight:0:0 */, meta[relay.Constant][41] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %27 = @vta_special.add_27(%24, %26) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %28 = @vta_special.conv2d_28(%27, meta[relay.Constant][42] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.6/m/m.2/cv1/conv/Conv.model.6.m.2.cv1.conv.weight:0:0 */, meta[relay.Constant][43] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %29 = @vta_special.conv2d_29(%28, meta[relay.Constant][44] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.6/m/m.2/cv2/conv/Conv.model.6.m.2.cv2.conv.weight:0:0 */, meta[relay.Constant][45] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %30 = @vta_special.add_30(%27, %29) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %31 = @vta_special.conv2d_31(%20, meta[relay.Constant][46] /* ty=Tensor[(64, 128, 1, 1), float32] span=/model.6/cv2/conv/Conv.model.6.cv2.conv.weight:0:0 */, meta[relay.Constant][47] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %32 = @vta_special.concat_4dim_2tensor_32(%30, %31) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %33 = @vta_special.conv2d_33(%32, meta[relay.Constant][48] /* ty=Tensor[(128, 128, 1, 1), float32] span=/model.6/cv3/conv/Conv.model.6.cv3.conv.weight:0:0 */, meta[relay.Constant][49] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %34 = @vta_special.conv2d_34(%33, meta[relay.Constant][50] /* ty=Tensor[(256, 128, 3, 3), float32] span=/model.7/conv/Conv.model.7.conv.weight:0:0 */, meta[relay.Constant][51] /* ty=Tensor[(256, 1, 1), float32] */) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %35 = @vta_special.conv2d_35(%34, meta[relay.Constant][52] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.8/cv1/conv/Conv.model.8.cv1.conv.weight:0:0 */, meta[relay.Constant][53] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %36 = @vta_special.conv2d_36(%35, meta[relay.Constant][54] /* ty=Tensor[(128, 128, 1, 1), float32] span=/model.8/m/m.0/cv1/conv/Conv.model.8.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][55] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %37 = @vta_special.conv2d_37(%36, meta[relay.Constant][56] /* ty=Tensor[(128, 128, 3, 3), float32] span=/model.8/m/m.0/cv2/conv/Conv.model.8.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][57] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %38 = @vta_special.add_38(%35, %37) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %39 = @vta_special.conv2d_39(%34, meta[relay.Constant][58] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.8/cv2/conv/Conv.model.8.cv2.conv.weight:0:0 */, meta[relay.Constant][59] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %40 = @vta_special.concat_4dim_2tensor_40(%38, %39) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %41 = @vta_special.conv2d_41(%40, meta[relay.Constant][60] /* ty=Tensor[(256, 256, 1, 1), float32] span=/model.8/cv3/conv/Conv.model.8.cv3.conv.weight:0:0 */, meta[relay.Constant][61] /* ty=Tensor[(256, 1, 1), float32] */) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %42 = @vta_special.conv2d_42(%41, meta[relay.Constant][62] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.9/cv1/conv/Conv.model.9.cv1.conv.weight:0:0 */, meta[relay.Constant][63] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %43 = @vta_special.max_pool2d_43(%42) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %44 = @vta_special.max_pool2d_44(%43) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %45 = @vta_special.max_pool2d_45(%44) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %46 = @vta_special.concat_4dim_4tensor_46(%42, %43, %44, %45) /* ty=Tensor[(1, 512, 20, 20), float32] */;
  %47 = @vta_special.conv2d_47(%46, meta[relay.Constant][64] /* ty=Tensor[(256, 512, 1, 1), float32] span=/model.9/cv2/conv/Conv.model.9.cv2.conv.weight:0:0 */, meta[relay.Constant][65] /* ty=Tensor[(256, 1, 1), float32] */) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %48 = @vta_special.conv2d_48(%47, meta[relay.Constant][66] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.10/conv/Conv.model.10.conv.weight:0:0 */, meta[relay.Constant][67] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %49 = @vta_special.resize2d_49(%48) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %50 = @vta_special.concat_4dim_2tensor_50(%49, %33) /* ty=Tensor[(1, 256, 40, 40), float32] */;
  %51 = @vta_special.conv2d_51(%50, meta[relay.Constant][68] /* ty=Tensor[(64, 256, 1, 1), float32] span=/model.13/cv1/conv/Conv.model.13.cv1.conv.weight:0:0 */, meta[relay.Constant][69] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %52 = @vta_special.conv2d_52(%51, meta[relay.Constant][70] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.13/m/m.0/cv1/conv/Conv.model.13.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][71] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %53 = @vta_special.conv2d_53(%52, meta[relay.Constant][72] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.13/m/m.0/cv2/conv/Conv.model.13.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][73] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %54 = @vta_special.conv2d_54(%50, meta[relay.Constant][74] /* ty=Tensor[(64, 256, 1, 1), float32] span=/model.13/cv2/conv/Conv.model.13.cv2.conv.weight:0:0 */, meta[relay.Constant][75] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %55 = @vta_special.concat_4dim_2tensor_55(%53, %54) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %56 = @vta_special.conv2d_56(%55, meta[relay.Constant][76] /* ty=Tensor[(128, 128, 1, 1), float32] span=/model.13/cv3/conv/Conv.model.13.cv3.conv.weight:0:0 */, meta[relay.Constant][77] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %57 = @vta_special.conv2d_57(%56, meta[relay.Constant][78] /* ty=Tensor[(64, 128, 1, 1), float32] span=/model.14/conv/Conv.model.14.conv.weight:0:0 */, meta[relay.Constant][79] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %58 = @vta_special.resize2d_58(%57) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %59 = @vta_special.concat_4dim_2tensor_59(%58, %19) /* ty=Tensor[(1, 128, 80, 80), float32] */;
  %60 = @vta_special.conv2d_60(%59, meta[relay.Constant][80] /* ty=Tensor[(32, 128, 1, 1), float32] span=/model.17/cv1/conv/Conv.model.17.cv1.conv.weight:0:0 */, meta[relay.Constant][81] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %61 = @vta_special.conv2d_61(%60, meta[relay.Constant][82] /* ty=Tensor[(32, 32, 1, 1), float32] span=/model.17/m/m.0/cv1/conv/Conv.model.17.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][83] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %62 = @vta_special.conv2d_62(%61, meta[relay.Constant][84] /* ty=Tensor[(32, 32, 3, 3), float32] span=/model.17/m/m.0/cv2/conv/Conv.model.17.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][85] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %63 = @vta_special.conv2d_63(%59, meta[relay.Constant][86] /* ty=Tensor[(32, 128, 1, 1), float32] span=/model.17/cv2/conv/Conv.model.17.cv2.conv.weight:0:0 */, meta[relay.Constant][87] /* ty=Tensor[(32, 1, 1), float32] */) /* ty=Tensor[(1, 32, 80, 80), float32] */;
  %64 = @vta_special.concat_4dim_2tensor_64(%62, %63) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %65 = @vta_special.conv2d_65(%64, meta[relay.Constant][88] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.17/cv3/conv/Conv.model.17.cv3.conv.weight:0:0 */, meta[relay.Constant][89] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %66 = @vta_special.conv2d_66(%65, meta[relay.Constant][90] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.24/cv2.0/cv2.0.0/conv/Conv.model.24.cv2.0.0.conv.weight:0:0 */, meta[relay.Constant][91] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %67 = @vta_special.conv2d_67(%66, meta[relay.Constant][92] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.24/cv2.0/cv2.0.1/conv/Conv.model.24.cv2.0.1.conv.weight:0:0 */, meta[relay.Constant][93] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %68 = @vta_special.conv2d_69(%65, meta[relay.Constant][96] /* ty=Tensor[(80, 64, 3, 3), float32] span=/model.24/cv3.0/cv3.0.0/conv/Conv.model.24.cv3.0.0.conv.weight:0:0 */, meta[relay.Constant][97] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 80, 80), float32] */;
  %69 = @vta_special.conv2d_70(%68, meta[relay.Constant][98] /* ty=Tensor[(80, 80, 3, 3), float32] span=/model.24/cv3.0/cv3.0.1/conv/Conv.model.24.cv3.0.1.conv.weight:0:0 */, meta[relay.Constant][99] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 80, 80), float32] */;
  %70 = @vta_special.conv2d_72(%65, meta[relay.Constant][102] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.18/conv/Conv.model.18.conv.weight:0:0 */, meta[relay.Constant][103] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %71 = @vta_special.concat_4dim_2tensor_73(%70, %57) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %72 = @vta_special.conv2d_74(%71, meta[relay.Constant][104] /* ty=Tensor[(64, 128, 1, 1), float32] span=/model.20/cv1/conv/Conv.model.20.cv1.conv.weight:0:0 */, meta[relay.Constant][105] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %73 = @vta_special.conv2d_75(%72, meta[relay.Constant][106] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.20/m/m.0/cv1/conv/Conv.model.20.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][107] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %74 = @vta_special.conv2d_76(%73, meta[relay.Constant][108] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.20/m/m.0/cv2/conv/Conv.model.20.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][109] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %75 = @vta_special.conv2d_77(%71, meta[relay.Constant][110] /* ty=Tensor[(64, 128, 1, 1), float32] span=/model.20/cv2/conv/Conv.model.20.cv2.conv.weight:0:0 */, meta[relay.Constant][111] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %76 = @vta_special.concat_4dim_2tensor_78(%74, %75) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %77 = @vta_special.conv2d_79(%76, meta[relay.Constant][112] /* ty=Tensor[(128, 128, 1, 1), float32] span=/model.20/cv3/conv/Conv.model.20.cv3.conv.weight:0:0 */, meta[relay.Constant][113] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 40, 40), float32] */;
  %78 = @vta_special.conv2d_80(%77, meta[relay.Constant][114] /* ty=Tensor[(64, 128, 3, 3), float32] span=/model.24/cv2.1/cv2.1.0/conv/Conv.model.24.cv2.1.0.conv.weight:0:0 */, meta[relay.Constant][115] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %79 = @vta_special.conv2d_81(%78, meta[relay.Constant][116] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.24/cv2.1/cv2.1.1/conv/Conv.model.24.cv2.1.1.conv.weight:0:0 */, meta[relay.Constant][117] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %80 = @vta_special.conv2d_83(%77, meta[relay.Constant][120] /* ty=Tensor[(80, 128, 3, 3), float32] span=/model.24/cv3.1/cv3.1.0/conv/Conv.model.24.cv3.1.0.conv.weight:0:0 */, meta[relay.Constant][121] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 40, 40), float32] */;
  %81 = @vta_special.conv2d_84(%80, meta[relay.Constant][122] /* ty=Tensor[(80, 80, 3, 3), float32] span=/model.24/cv3.1/cv3.1.1/conv/Conv.model.24.cv3.1.1.conv.weight:0:0 */, meta[relay.Constant][123] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 40, 40), float32] */;
  %82 = @vta_special.conv2d_86(%77, meta[relay.Constant][126] /* ty=Tensor[(128, 128, 3, 3), float32] span=/model.21/conv/Conv.model.21.conv.weight:0:0 */, meta[relay.Constant][127] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %83 = @vta_special.concat_4dim_2tensor_87(%82, %48) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %84 = @vta_special.conv2d_88(%83, meta[relay.Constant][128] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.23/cv1/conv/Conv.model.23.cv1.conv.weight:0:0 */, meta[relay.Constant][129] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %85 = @vta_special.conv2d_89(%84, meta[relay.Constant][130] /* ty=Tensor[(128, 128, 1, 1), float32] span=/model.23/m/m.0/cv1/conv/Conv.model.23.m.0.cv1.conv.weight:0:0 */, meta[relay.Constant][131] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %86 = @vta_special.conv2d_90(%85, meta[relay.Constant][132] /* ty=Tensor[(128, 128, 3, 3), float32] span=/model.23/m/m.0/cv2/conv/Conv.model.23.m.0.cv2.conv.weight:0:0 */, meta[relay.Constant][133] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %87 = @vta_special.conv2d_91(%83, meta[relay.Constant][134] /* ty=Tensor[(128, 256, 1, 1), float32] span=/model.23/cv2/conv/Conv.model.23.cv2.conv.weight:0:0 */, meta[relay.Constant][135] /* ty=Tensor[(128, 1, 1), float32] */) /* ty=Tensor[(1, 128, 20, 20), float32] */;
  %88 = @vta_special.concat_4dim_2tensor_92(%86, %87) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %89 = @vta_special.conv2d_93(%88, meta[relay.Constant][136] /* ty=Tensor[(256, 256, 1, 1), float32] span=/model.23/cv3/conv/Conv.model.23.cv3.conv.weight:0:0 */, meta[relay.Constant][137] /* ty=Tensor[(256, 1, 1), float32] */) /* ty=Tensor[(1, 256, 20, 20), float32] */;
  %90 = @vta_special.conv2d_94(%89, meta[relay.Constant][138] /* ty=Tensor[(64, 256, 3, 3), float32] span=/model.24/cv2.2/cv2.2.0/conv/Conv.model.24.cv2.2.0.conv.weight:0:0 */, meta[relay.Constant][139] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 20, 20), float32] */;
  %91 = @vta_special.conv2d_95(%90, meta[relay.Constant][140] /* ty=Tensor[(64, 64, 3, 3), float32] span=/model.24/cv2.2/cv2.2.1/conv/Conv.model.24.cv2.2.1.conv.weight:0:0 */, meta[relay.Constant][141] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 20, 20), float32] */;
  %92 = @vta_special.conv2d_97(%89, meta[relay.Constant][144] /* ty=Tensor[(80, 256, 3, 3), float32] span=/model.24/cv3.2/cv3.2.0/conv/Conv.model.24.cv3.2.0.conv.weight:0:0 */, meta[relay.Constant][145] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 20, 20), float32] */;
  %93 = @vta_special.conv2d_98(%92, meta[relay.Constant][146] /* ty=Tensor[(80, 80, 3, 3), float32] span=/model.24/cv3.2/cv3.2.1/conv/Conv.model.24.cv3.2.1.conv.weight:0:0 */, meta[relay.Constant][147] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 20, 20), float32] */;
  %94 = @vta_special.conv2d_68(%67, meta[relay.Constant][94] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.24/cv2.0/cv2.0.2/Conv.model.24.cv2.0.2.weight:0:0 */, meta[relay.Constant][95] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 80, 80), float32] */;
  %95 = @vta_special.conv2d_71(%69, meta[relay.Constant][100] /* ty=Tensor[(80, 80, 1, 1), float32] span=/model.24/cv3.0/cv3.0.2/Conv.model.24.cv3.0.2.weight:0:0 */, meta[relay.Constant][101] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 80, 80), float32] */;
  %96 = @vta_special.conv2d_82(%79, meta[relay.Constant][118] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.24/cv2.1/cv2.1.2/Conv.model.24.cv2.1.2.weight:0:0 */, meta[relay.Constant][119] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 40, 40), float32] */;
  %97 = @vta_special.conv2d_85(%81, meta[relay.Constant][124] /* ty=Tensor[(80, 80, 1, 1), float32] span=/model.24/cv3.1/cv3.1.2/Conv.model.24.cv3.1.2.weight:0:0 */, meta[relay.Constant][125] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 40, 40), float32] */;
  %98 = @vta_special.conv2d_96(%91, meta[relay.Constant][142] /* ty=Tensor[(64, 64, 1, 1), float32] span=/model.24/cv2.2/cv2.2.2/Conv.model.24.cv2.2.2.weight:0:0 */, meta[relay.Constant][143] /* ty=Tensor[(64, 1, 1), float32] */) /* ty=Tensor[(1, 64, 20, 20), float32] */;
  %99 = @vta_special.conv2d_99(%93, meta[relay.Constant][148] /* ty=Tensor[(80, 80, 1, 1), float32] span=/model.24/cv3.2/cv3.2.2/Conv.model.24.cv3.2.2.weight:0:0 */, meta[relay.Constant][149] /* ty=Tensor[(80, 1, 1), float32] */) /* ty=Tensor[(1, 80, 20, 20), float32] */;
  @vta_special.yolo_output_all_100(%94, %95, %96, %97, %98, %99) /* ty=Tensor[(1, 84, 8400), float32] */
} /* ty=fn (Tensor[(1, 3, 640, 640), float32]) -> Tensor[(1, 84, 8400), float32] */
from pathlib import Path
from tqdm import tqdm

ENV = {
    "model_type": "onnx",
    "input_name": "images",
    "channel": 3,
    "height": 640, 
    "width": 640,
    "mode": "RGB", # 输入图片格式
    "mean": (0,),
    "std": (255,)
}

def letterbox_image(im: Image, dst_width: int, dst_height: int):
    '''使用填充保持纵横比缩放图像
    
    Args:
        im: 原始 Image
        dst_width: 目标宽度
        dst_height: 目标高度
    '''
    iw, ih = im.size # 原始尺寸
    scale = min(dst_width/iw, dst_height/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)
    im = im.resize((nw, nh), Image.BICUBIC)
    new_image = Image.new('RGB', (dst_width, dst_height), (114, 114, 114))
    new_image.paste(im, ((dst_width-nw)//2, (dst_height-nh)//2))
    return new_image

def preprocessing(path: str|None, **ENV: dict):
    if not path:
        im = np.random.randint(0, 256, size=(32, 32, 3), dtype="uint8")
        im = Image.fromarray(im) # 转为 Image 实例
    else:
        im = Image.open(path)
    # im = im.resize((ENV["width"], ENV["height"]), Image.BICUBIC)
    im = letterbox_image(im, ENV["width"], ENV["height"])
    if ENV["mode"] == "L": # 将灰度图转换为 HWC 布局
        img = im.convert("L")
        img = np.expand_dims(img, axis=-1) # 转为 HWC
    elif ENV["mode"] == "RGB":
        img = np.array(im.convert("RGB")) # 转为 HWC 布局
    elif ENV["mode"] == "BGR":
        img = np.array(im.convert("RGB")) # 转为 HWC 布局
        img = img[..., ::-1] # RGB 转 BGR
    else:
        raise TypeError(f'暂未支持数据布局 {ENV["mode"]}')
    image_np = np.expand_dims(img, 0) # 转换为 NHWC (uint8 数据)
    # 预处理后的数据
    data_inp = ((image_np - ENV["mean"]) / ENV["std"]).astype(np.float32)
    data_inp = data_inp.transpose(0, 3, 1, 2)
    return np.ascontiguousarray(image_np), np.ascontiguousarray(data_inp)

def calibrateset(calibrate_num=2, data_dir="/media/pc/data/lxw/home/data/coco/train2017"):
    """用于量化的校准数据集"""
    for k, path in tqdm(enumerate(Path(data_dir).iterdir()), desc="Calibrate", unit="batch"):
        if k >= calibrate_num:
            break
        yield {ENV["input_name"]: preprocessing(path, **ENV)[1]}
run_mod = deepcopy(mod)
with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
    run_mod["main"] = rewrite(Dist2xywhSimplify(), run_mod["main"])
    with relay.quantize.qconfig(
        calibrate_mode="percentile", weight_scale="max"):
        qmod = relay.quantize.quantize(run_mod, params, dataset=calibrateset())
qmod.show()
from tvm.relay.dataflow_pattern import rewrite
from tvm_book.transforms.yolo import Dist2xywhSimplify
import tvm

with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
    lib = relay.build(qmod, target="llvm", params=params)

func = lib[lib.libmod_name]
module = tvm.contrib.graph_executor.GraphModule(func(tvm.cpu(0)))
module.run(**{input_name: xs})
num_outputs = module.get_num_outputs()
quant_outputs = [module.get_output(k).numpy() for k in range(num_outputs)]
results = postprocess(
    [torch.from_numpy(o) for o in quant_outputs], 
    xs, [origin_image], self.model.names, 
    input_path, conf_thres=0.25, iou_thres=0.45,
)
Image.fromarray(results[0].plot())
ww
from tvm.relay.analysis import _ffi_api

output_map = _ffi_api.get_calibrate_output_map(run_mod)
calibrate_mod = _ffi_api.get_calibrate_module(run_mod)
calibrate_mod = relay.transform.Inline()(calibrate_mod)
ref_res = relay.build_module.create_executor("graph", mod=calibrate_mod, device=tvm.cpu(0)).evaluate()(**{input_name: xs})

calib_data = {}
for gvar, indices in output_map.items():
    offset = int(indices[0])
    in_len = int(indices[1])
    out_len = int(indices[2])
    value = {
        "inputs": ref_res[offset : offset + in_len],
        "outputs": ref_res[offset + in_len : offset + in_len + out_len],
    }
    calib_data[gvar] = value
func_map = {int(kk.name_hint.split("_")[-1]): kk for kk in calib_data.keys()}
calib_data[func_map[len(func_map)-1]]