YOLO 输出#
import set_env
from d2py.utils.file import mkdir
root_dir = ".temp"
mkdir(f"{root_dir}/logs")
import torch
from torch.nn import functional as F
from torch import nn
from torch.onnx import OperatorExportTypes, utils
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
# default_act = nn.SiLU() # default activation
default_act = nn.ReLU()
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Perform transposed convolution of 2D data."""
return self.act(self.conv(x))
class DFL(nn.Module):
"""
Integral module of Distribution Focal Loss (DFL).
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
"""
def __init__(self, c1=16):
"""Initialize a convolutional layer with a given number of input channels."""
super().__init__()
self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
x = torch.arange(c1, dtype=torch.float)
self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
self.c1 = c1
def forward(self, x):
"""Applies a transformer layer on input tensor 'x' and returns a tensor."""
b, c, a = x.shape # batch, channels, anchors
# return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
def make_anchors(feats, strides, grid_cell_offset=0.5):
"""Generate anchors from features."""
anchor_points, stride_tensor = [], []
assert feats is not None
# for ff in feats:
# print(f"make_anchors: {ff.shape}")
dtype, device = feats[0].dtype, feats[0].device
for i, stride in enumerate(strides):
_, _, h, w = feats[i].shape
print(f"stride, h, w: {stride, h, w}")
sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x
sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y
sy, sx = torch.meshgrid(sy, sx, indexing='ij')
anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
return torch.cat(anchor_points), torch.cat(stride_tensor)
def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
"""Transform distance(ltrb) to box(xywh or xyxy)."""
lt, rb = distance.chunk(2, dim)
x1y1 = anchor_points - lt
x2y2 = anchor_points + rb
if xywh:
c_xy = (x1y1 + x2y2) / 2
wh = x2y2 - x1y1
return torch.cat((c_xy, wh), dim) # xywh bbox
return torch.cat((x1y1, x2y2), dim) # xyxy bbox
class Detect(nn.Module):
"""YOLOv8 Detect head for detection models."""
dynamic = False # force grid reconstruction
export = False # export mode
shape = None
anchors = torch.empty(0) # init
strides = torch.empty(0) # init
def __init__(self, nc=80, ch=()): # detection layer
super().__init__()
self.nc = nc # number of classes
self.nl = len(ch) # number of detection layers
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
self.no = nc + self.reg_max * 4 # number of outputs per anchor
self.stride = torch.tensor([8., 16., 32.]) #torch.zeros(self.nl) # strides computed during build
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100)) # channels
self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
def forward(self, x):
"""Concatenates and returns predicted bounding boxes and class probabilities."""
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
print(f"Detect: {self.stride}")
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
box = x_cat[:, :self.reg_max * 4]
cls = x_cat[:, self.reg_max * 4:]
else:
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
if self.export and self.format in ('tflite', 'edgetpu'):
# Normalize xywh with image size to mitigate quantization error of TFLite integer models as done in YOLOv5:
# https://github.com/ultralytics/yolov5/blob/0c8de3fca4a702f8ff5c435e67f378d1fce70243/models/tf.py#L307-L309
# See this PR for details: https://github.com/ultralytics/ultralytics/pull/1695
img_h = shape[2] * self.stride[0]
img_w = shape[3] * self.stride[0]
img_size = torch.tensor([img_w, img_h, img_w, img_h], device=dbox.device).reshape(1, 4, 1)
dbox /= img_size
y = torch.cat((dbox, cls.sigmoid()), 1)
return y
# return y if self.export else (y, x)
class M(nn.Module):
def __init__(self, nc=80, ch=(16, 256, 1024)): # detection layer
super().__init__()
self.conv0 = nn.Conv2d(3, 16, 1, 1, groups=1, bias=False)
self.conv1 = nn.Conv2d(16, ch[0], 1, 1, groups=1, bias=False)
self.conv2 = nn.Conv2d(16, ch[1], 1, 2, groups=1, bias=False)
self.conv3 = nn.Conv2d(16, ch[2], 1, 4, groups=1, bias=False)
self.det = Detect(nc=nc, ch=ch)
def forward(self, x):
x = self.conv0(x)
x1 = self.conv1(x)
x2 = self.conv2(x)
x3 = self.conv3(x)
x = self.det([x1, x2, x3])
return x
model = M().eval()
shape = 1, 3, 48, 80
xx = torch.rand(*shape, dtype=torch.float32, requires_grad=False)
# model = torch.jit.trace(model, xx)
# 导出模型
input_name = "data"
output_name = "yolo"
utils.export(
model, # torch 模型
xx, # 模型输入或者对于多个输入,使用元组
f"{root_dir}/{output_name}.onnx", # 模型保存的位置(可以是文件或类似文件的对象)
export_params=True, # 将训练后的参数权重存储在模型文件内
opset_version=17, # 导出模型的 ONNX 版本
do_constant_folding=True, # 是否执行常量折叠以进行优化
input_names = [input_name], # 模型的输入名称
output_names = ['output'], # 模型的输出名称
keep_initializers_as_inputs=True,
# export_modules_as_functions=True,
verbose=True,
operator_export_type=OperatorExportTypes.ONNX_FALLTHROUGH,
# dynamic_axes={'data' : {0 : 'batch_size'}, # 可变长度的轴
# 'output' : {0 : 'batch_size'}}
)
Detect: tensor([ 8., 16., 32.])
stride, h, w: (tensor(8.), tensor(48), tensor(80))
stride, h, w: (tensor(16.), tensor(24), tensor(40))
stride, h, w: (tensor(32.), tensor(12), tensor(20))
Exported graph: graph(%data : Float(1, 3, 48, 80, strides=[11520, 3840, 80, 1], requires_grad=0, device=cpu),
%conv0.weight : Float(16, 3, 1, 1, strides=[3, 1, 1, 1], requires_grad=1, device=cpu),
%conv1.weight : Float(16, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cpu),
%conv2.weight : Float(256, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cpu),
%conv3.weight : Float(1024, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv2.0.2.weight : Float(64, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv2.0.2.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
%det.cv2.1.2.weight : Float(64, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv2.1.2.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
%det.cv2.2.2.weight : Float(64, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv2.2.2.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
%det.cv3.0.2.weight : Float(80, 80, 1, 1, strides=[80, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv3.0.2.bias : Float(80, strides=[1], requires_grad=1, device=cpu),
%det.cv3.1.2.weight : Float(80, 80, 1, 1, strides=[80, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv3.1.2.bias : Float(80, strides=[1], requires_grad=1, device=cpu),
%det.cv3.2.2.weight : Float(80, 80, 1, 1, strides=[80, 1, 1, 1], requires_grad=1, device=cpu),
%det.cv3.2.2.bias : Float(80, strides=[1], requires_grad=1, device=cpu),
%det.dfl.conv.weight : Float(1, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=0, device=cpu),
%onnx::Conv_296 : Float(64, 16, 3, 3, strides=[144, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_297 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_299 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_300 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_302 : Float(80, 16, 3, 3, strides=[144, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_303 : Float(80, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_305 : Float(80, 80, 3, 3, strides=[720, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_306 : Float(80, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_308 : Float(64, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_309 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_311 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_312 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_314 : Float(80, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_315 : Float(80, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_317 : Float(80, 80, 3, 3, strides=[720, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_318 : Float(80, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_320 : Float(64, 1024, 3, 3, strides=[9216, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_321 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_323 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_324 : Float(64, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_326 : Float(80, 1024, 3, 3, strides=[9216, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_327 : Float(80, strides=[1], requires_grad=0, device=cpu),
%onnx::Conv_329 : Float(80, 80, 3, 3, strides=[720, 9, 3, 1], requires_grad=0, device=cpu),
%onnx::Conv_330 : Float(80, strides=[1], requires_grad=0, device=cpu)):
%/conv0/Conv_output_0 : Float(1, 16, 48, 80, strides=[61440, 3840, 80, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv0/Conv"](%data, %conv0.weight), scope: __main__.M::/torch.nn.modules.conv.Conv2d::conv0 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/conv1/Conv_output_0 : Float(1, 16, 48, 80, strides=[61440, 3840, 80, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv1/Conv"](%/conv0/Conv_output_0, %conv1.weight), scope: __main__.M::/torch.nn.modules.conv.Conv2d::conv1 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/conv2/Conv_output_0 : Float(1, 256, 24, 40, strides=[245760, 960, 40, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2], onnx_name="/conv2/Conv"](%/conv0/Conv_output_0, %conv2.weight), scope: __main__.M::/torch.nn.modules.conv.Conv2d::conv2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/conv3/Conv_output_0 : Float(1, 1024, 12, 20, strides=[245760, 240, 20, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[4, 4], onnx_name="/conv3/Conv"](%/conv0/Conv_output_0, %conv3.weight), scope: __main__.M::/torch.nn.modules.conv.Conv2d::conv3 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.0/cv2.0.0/conv/Conv_output_0 : Float(1, 64, 48, 80, strides=[245760, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.0/cv2.0.0/conv/Conv"](%/conv1/Conv_output_0, %onnx::Conv_296, %onnx::Conv_297), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.0/__main__.Conv::cv2.0.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.0/cv2.0.0/act/Relu_output_0 : Float(1, 64, 48, 80, strides=[245760, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.0/cv2.0.0/act/Relu"](%/det/cv2.0/cv2.0.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.0/__main__.Conv::cv2.0.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.0/cv2.0.1/conv/Conv_output_0 : Float(1, 64, 48, 80, strides=[245760, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.0/cv2.0.1/conv/Conv"](%/det/cv2.0/cv2.0.0/act/Relu_output_0, %onnx::Conv_299, %onnx::Conv_300), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.0/__main__.Conv::cv2.0.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.0/cv2.0.1/act/Relu_output_0 : Float(1, 64, 48, 80, strides=[245760, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.0/cv2.0.1/act/Relu"](%/det/cv2.0/cv2.0.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.0/__main__.Conv::cv2.0.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.0/cv2.0.2/Conv_output_0 : Float(1, 64, 48, 80, strides=[245760, 3840, 80, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv2.0/cv2.0.2/Conv"](%/det/cv2.0/cv2.0.1/act/Relu_output_0, %det.cv2.0.2.weight, %det.cv2.0.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.0/torch.nn.modules.conv.Conv2d::cv2.0.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.0/cv3.0.0/conv/Conv_output_0 : Float(1, 80, 48, 80, strides=[307200, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.0/cv3.0.0/conv/Conv"](%/conv1/Conv_output_0, %onnx::Conv_302, %onnx::Conv_303), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.0/__main__.Conv::cv3.0.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.0/cv3.0.0/act/Relu_output_0 : Float(1, 80, 48, 80, strides=[307200, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.0/cv3.0.0/act/Relu"](%/det/cv3.0/cv3.0.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.0/__main__.Conv::cv3.0.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.0/cv3.0.1/conv/Conv_output_0 : Float(1, 80, 48, 80, strides=[307200, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.0/cv3.0.1/conv/Conv"](%/det/cv3.0/cv3.0.0/act/Relu_output_0, %onnx::Conv_305, %onnx::Conv_306), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.0/__main__.Conv::cv3.0.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.0/cv3.0.1/act/Relu_output_0 : Float(1, 80, 48, 80, strides=[307200, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.0/cv3.0.1/act/Relu"](%/det/cv3.0/cv3.0.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.0/__main__.Conv::cv3.0.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.0/cv3.0.2/Conv_output_0 : Float(1, 80, 48, 80, strides=[307200, 3840, 80, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv3.0/cv3.0.2/Conv"](%/det/cv3.0/cv3.0.1/act/Relu_output_0, %det.cv3.0.2.weight, %det.cv3.0.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.0/torch.nn.modules.conv.Conv2d::cv3.0.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/Concat_output_0 : Float(1, 144, 48, 80, strides=[552960, 3840, 80, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=1, onnx_name="/det/Concat"](%/det/cv2.0/cv2.0.2/Conv_output_0, %/det/cv3.0/cv3.0.2/Conv_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:26:0
%/det/cv2.1/cv2.1.0/conv/Conv_output_0 : Float(1, 64, 24, 40, strides=[61440, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.1/cv2.1.0/conv/Conv"](%/conv2/Conv_output_0, %onnx::Conv_308, %onnx::Conv_309), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.1/__main__.Conv::cv2.1.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.1/cv2.1.0/act/Relu_output_0 : Float(1, 64, 24, 40, strides=[61440, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.1/cv2.1.0/act/Relu"](%/det/cv2.1/cv2.1.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.1/__main__.Conv::cv2.1.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.1/cv2.1.1/conv/Conv_output_0 : Float(1, 64, 24, 40, strides=[61440, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.1/cv2.1.1/conv/Conv"](%/det/cv2.1/cv2.1.0/act/Relu_output_0, %onnx::Conv_311, %onnx::Conv_312), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.1/__main__.Conv::cv2.1.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.1/cv2.1.1/act/Relu_output_0 : Float(1, 64, 24, 40, strides=[61440, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.1/cv2.1.1/act/Relu"](%/det/cv2.1/cv2.1.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.1/__main__.Conv::cv2.1.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.1/cv2.1.2/Conv_output_0 : Float(1, 64, 24, 40, strides=[61440, 960, 40, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv2.1/cv2.1.2/Conv"](%/det/cv2.1/cv2.1.1/act/Relu_output_0, %det.cv2.1.2.weight, %det.cv2.1.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.1/torch.nn.modules.conv.Conv2d::cv2.1.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.1/cv3.1.0/conv/Conv_output_0 : Float(1, 80, 24, 40, strides=[76800, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.1/cv3.1.0/conv/Conv"](%/conv2/Conv_output_0, %onnx::Conv_314, %onnx::Conv_315), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.1/__main__.Conv::cv3.1.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.1/cv3.1.0/act/Relu_output_0 : Float(1, 80, 24, 40, strides=[76800, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.1/cv3.1.0/act/Relu"](%/det/cv3.1/cv3.1.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.1/__main__.Conv::cv3.1.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.1/cv3.1.1/conv/Conv_output_0 : Float(1, 80, 24, 40, strides=[76800, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.1/cv3.1.1/conv/Conv"](%/det/cv3.1/cv3.1.0/act/Relu_output_0, %onnx::Conv_317, %onnx::Conv_318), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.1/__main__.Conv::cv3.1.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.1/cv3.1.1/act/Relu_output_0 : Float(1, 80, 24, 40, strides=[76800, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.1/cv3.1.1/act/Relu"](%/det/cv3.1/cv3.1.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.1/__main__.Conv::cv3.1.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.1/cv3.1.2/Conv_output_0 : Float(1, 80, 24, 40, strides=[76800, 960, 40, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv3.1/cv3.1.2/Conv"](%/det/cv3.1/cv3.1.1/act/Relu_output_0, %det.cv3.1.2.weight, %det.cv3.1.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.1/torch.nn.modules.conv.Conv2d::cv3.1.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/Concat_1_output_0 : Float(1, 144, 24, 40, strides=[138240, 960, 40, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=1, onnx_name="/det/Concat_1"](%/det/cv2.1/cv2.1.2/Conv_output_0, %/det/cv3.1/cv3.1.2/Conv_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:26:0
%/det/cv2.2/cv2.2.0/conv/Conv_output_0 : Float(1, 64, 12, 20, strides=[15360, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.2/cv2.2.0/conv/Conv"](%/conv3/Conv_output_0, %onnx::Conv_320, %onnx::Conv_321), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.2/__main__.Conv::cv2.2.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.2/cv2.2.0/act/Relu_output_0 : Float(1, 64, 12, 20, strides=[15360, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.2/cv2.2.0/act/Relu"](%/det/cv2.2/cv2.2.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.2/__main__.Conv::cv2.2.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.2/cv2.2.1/conv/Conv_output_0 : Float(1, 64, 12, 20, strides=[15360, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv2.2/cv2.2.1/conv/Conv"](%/det/cv2.2/cv2.2.0/act/Relu_output_0, %onnx::Conv_323, %onnx::Conv_324), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.2/__main__.Conv::cv2.2.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv2.2/cv2.2.1/act/Relu_output_0 : Float(1, 64, 12, 20, strides=[15360, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv2.2/cv2.2.1/act/Relu"](%/det/cv2.2/cv2.2.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.2/__main__.Conv::cv2.2.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv2.2/cv2.2.2/Conv_output_0 : Float(1, 64, 12, 20, strides=[15360, 240, 20, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv2.2/cv2.2.2/Conv"](%/det/cv2.2/cv2.2.1/act/Relu_output_0, %det.cv2.2.2.weight, %det.cv2.2.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv2.2/torch.nn.modules.conv.Conv2d::cv2.2.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.2/cv3.2.0/conv/Conv_output_0 : Float(1, 80, 12, 20, strides=[19200, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.2/cv3.2.0/conv/Conv"](%/conv3/Conv_output_0, %onnx::Conv_326, %onnx::Conv_327), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.2/__main__.Conv::cv3.2.0/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.2/cv3.2.0/act/Relu_output_0 : Float(1, 80, 12, 20, strides=[19200, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.2/cv3.2.0/act/Relu"](%/det/cv3.2/cv3.2.0/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.2/__main__.Conv::cv3.2.0/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.2/cv3.2.1/conv/Conv_output_0 : Float(1, 80, 12, 20, strides=[19200, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name="/det/cv3.2/cv3.2.1/conv/Conv"](%/det/cv3.2/cv3.2.0/act/Relu_output_0, %onnx::Conv_329, %onnx::Conv_330), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.2/__main__.Conv::cv3.2.1/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/cv3.2/cv3.2.1/act/Relu_output_0 : Float(1, 80, 12, 20, strides=[19200, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="/det/cv3.2/cv3.2.1/act/Relu"](%/det/cv3.2/cv3.2.1/conv/Conv_output_0), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.2/__main__.Conv::cv3.2.1/torch.nn.modules.activation.ReLU::act # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/functional.py:1500:0
%/det/cv3.2/cv3.2.2/Conv_output_0 : Float(1, 80, 12, 20, strides=[19200, 240, 20, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/cv3.2/cv3.2.2/Conv"](%/det/cv3.2/cv3.2.1/act/Relu_output_0, %det.cv3.2.2.weight, %det.cv3.2.2.bias), scope: __main__.M::/__main__.Detect::det/torch.nn.modules.container.Sequential::cv3.2/torch.nn.modules.conv.Conv2d::cv3.2.2 # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/Concat_2_output_0 : Float(1, 144, 12, 20, strides=[34560, 240, 20, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=1, onnx_name="/det/Concat_2"](%/det/cv2.2/cv2.2.2/Conv_output_0, %/det/cv3.2/cv3.2.2/Conv_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:26:0
%/det/Constant_output_0 : Long(2, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 48 80 [ CPULongType{2} ], onnx_name="/det/Constant"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Constant_1_output_0 : Float(48, 1, strides=[1, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_1"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_output_0 : Float(48, 80, strides=[1, 0], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand"](%/det/Constant_1_output_0, %/det/Constant_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_2_output_0 : Float(1, 80, strides=[80, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_2"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_1_output_0 : Float(48, 80, strides=[0, 1], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand_1"](%/det/Constant_2_output_0, %/det/Constant_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_3_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_3"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_output_0 : Float(48, 80, 1, strides=[80, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze"](%/det/Expand_1_output_0, %/det/Constant_3_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_4_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_4"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_1_output_0 : Float(48, 80, 1, strides=[80, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_1"](%/det/Expand_output_0, %/det/Constant_4_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Concat_3_output_0 : Float(48, 80, 2, strides=[160, 2, 1], requires_grad=0, device=cpu) = onnx::Concat[axis=-1, onnx_name="/det/Concat_3"](%/det/Unsqueeze_output_0, %/det/Unsqueeze_1_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_5_output_0 : Long(2, strides=[1], device=cpu) = onnx::Constant[value=-1 2 [ CPULongType{2} ], onnx_name="/det/Constant_5"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Reshape_output_0 : Float(3840, 2, strides=[2, 1], requires_grad=0, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape"](%/det/Concat_3_output_0, %/det/Constant_5_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_6_output_0 : Long(2, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 24 40 [ CPULongType{2} ], onnx_name="/det/Constant_6"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Constant_7_output_0 : Float(24, 1, strides=[1, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_7"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_2_output_0 : Float(24, 40, strides=[1, 0], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand_2"](%/det/Constant_7_output_0, %/det/Constant_6_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_8_output_0 : Float(1, 40, strides=[40, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_8"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_3_output_0 : Float(24, 40, strides=[0, 1], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand_3"](%/det/Constant_8_output_0, %/det/Constant_6_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_9_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_9"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_2_output_0 : Float(24, 40, 1, strides=[40, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_2"](%/det/Expand_3_output_0, %/det/Constant_9_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_10_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_10"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_3_output_0 : Float(24, 40, 1, strides=[40, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_3"](%/det/Expand_2_output_0, %/det/Constant_10_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Concat_4_output_0 : Float(24, 40, 2, strides=[80, 2, 1], requires_grad=0, device=cpu) = onnx::Concat[axis=-1, onnx_name="/det/Concat_4"](%/det/Unsqueeze_2_output_0, %/det/Unsqueeze_3_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_11_output_0 : Long(2, strides=[1], device=cpu) = onnx::Constant[value=-1 2 [ CPULongType{2} ], onnx_name="/det/Constant_11"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Reshape_1_output_0 : Float(960, 2, strides=[2, 1], requires_grad=0, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape_1"](%/det/Concat_4_output_0, %/det/Constant_11_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_12_output_0 : Long(2, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 12 20 [ CPULongType{2} ], onnx_name="/det/Constant_12"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Constant_13_output_0 : Float(12, 1, strides=[1, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_13"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_4_output_0 : Float(12, 20, strides=[1, 0], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand_4"](%/det/Constant_13_output_0, %/det/Constant_12_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_14_output_0 : Float(1, 20, strides=[20, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_14"](), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/functional.py:512:0
%/det/Expand_5_output_0 : Float(12, 20, strides=[0, 1], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/det/Expand_5"](%/det/Constant_14_output_0, %/det/Constant_12_output_0), scope: __main__.M::/__main__.Detect::det
%/det/Constant_15_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_15"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_4_output_0 : Float(12, 20, 1, strides=[20, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_4"](%/det/Expand_5_output_0, %/det/Constant_15_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_16_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={-1}, onnx_name="/det/Constant_16"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Unsqueeze_5_output_0 : Float(12, 20, 1, strides=[20, 1, 1], device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_5"](%/det/Expand_4_output_0, %/det/Constant_16_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Concat_5_output_0 : Float(12, 20, 2, strides=[40, 2, 1], requires_grad=0, device=cpu) = onnx::Concat[axis=-1, onnx_name="/det/Concat_5"](%/det/Unsqueeze_4_output_0, %/det/Unsqueeze_5_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Constant_17_output_0 : Long(2, strides=[1], device=cpu) = onnx::Constant[value=-1 2 [ CPULongType{2} ], onnx_name="/det/Constant_17"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Reshape_2_output_0 : Float(240, 2, strides=[2, 1], requires_grad=0, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape_2"](%/det/Concat_5_output_0, %/det/Constant_17_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:14:0
%/det/Concat_6_output_0 : Float(5040, 2, strides=[2, 1], requires_grad=0, device=cpu) = onnx::Concat[axis=0, onnx_name="/det/Concat_6"](%/det/Reshape_output_0, %/det/Reshape_1_output_0, %/det/Reshape_2_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/2190579285.py:16:0
%/det/Transpose_output_0 : Float(2, 5040, strides=[1, 2], requires_grad=0, device=cpu) = onnx::Transpose[perm=[1, 0], onnx_name="/det/Transpose"](%/det/Concat_6_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:31:0
%/det/Constant_18_output_0 : Long(3, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 144 -1 [ CPULongType{3} ], onnx_name="/det/Constant_18"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Constant_19_output_0 : Long(3, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 144 -1 [ CPULongType{3} ], onnx_name="/det/Constant_19"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Constant_20_output_0 : Long(3, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 144 -1 [ CPULongType{3} ], onnx_name="/det/Constant_20"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Reshape_3_output_0 : Float(1, 144, 3840, strides=[552960, 3840, 1], requires_grad=1, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape_3"](%/det/Concat_output_0, %/det/Constant_18_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Reshape_4_output_0 : Float(1, 144, 960, strides=[138240, 960, 1], requires_grad=1, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape_4"](%/det/Concat_1_output_0, %/det/Constant_19_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Reshape_5_output_0 : Float(1, 144, 240, strides=[34560, 240, 1], requires_grad=1, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/Reshape_5"](%/det/Concat_2_output_0, %/det/Constant_20_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%/det/Concat_7_output_0 : Float(1, 144, 5040, strides=[725760, 5040, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=2, onnx_name="/det/Concat_7"](%/det/Reshape_3_output_0, %/det/Reshape_4_output_0, %/det/Reshape_5_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:34:0
%onnx::Split_241 : Long(2, strides=[1], device=cpu) = onnx::Constant[value= 64 80 [ CPULongType{2} ]]()
%/det/Split_output_0 : Float(1, 64, 5040, strides=[725760, 5040, 1], requires_grad=1, device=cpu), %/det/Split_output_1 : Float(1, 80, 5040, strides=[725760, 5040, 1], requires_grad=1, device=cpu) = onnx::Split[axis=1, onnx_name="/det/Split"](%/det/Concat_7_output_0, %onnx::Split_241), scope: __main__.M::/__main__.Detect::det # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/_tensor.py:921:0
%/det/dfl/Constant_output_0 : Long(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 16 4 5040 [ CPULongType{4} ], onnx_name="/det/dfl/Constant"](), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl # /tmp/ipykernel_2640477/467822237.py:19:0
%/det/dfl/Reshape_output_0 : Float(1, 16, 4, 5040, strides=[322560, 20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/dfl/Reshape"](%/det/Split_output_0, %/det/dfl/Constant_output_0), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl # /tmp/ipykernel_2640477/467822237.py:19:0
%/det/dfl/Softmax_output_0 : Float(1, 16, 4, 5040, strides=[322560, 20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Softmax[axis=1, onnx_name="/det/dfl/Softmax"](%/det/dfl/Reshape_output_0), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl # /tmp/ipykernel_2640477/467822237.py:19:0
%/det/dfl/conv/Conv_output_0 : Float(1, 1, 4, 5040, strides=[20160, 20160, 5040, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/det/dfl/conv/Conv"](%/det/dfl/Softmax_output_0, %det.dfl.conv.weight), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/py312x/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
%/det/dfl/Constant_1_output_0 : Long(3, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1 4 5040 [ CPULongType{3} ], onnx_name="/det/dfl/Constant_1"](), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl # /tmp/ipykernel_2640477/467822237.py:19:0
%/det/dfl/Reshape_1_output_0 : Float(1, 4, 5040, strides=[20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/det/dfl/Reshape_1"](%/det/dfl/conv/Conv_output_0, %/det/dfl/Constant_1_output_0), scope: __main__.M::/__main__.Detect::det/__main__.DFL::dfl # /tmp/ipykernel_2640477/467822237.py:19:0
%/det/Constant_21_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={0}, onnx_name="/det/Constant_21"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:40:0
%/det/Unsqueeze_6_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=0, device=cpu) = onnx::Unsqueeze[onnx_name="/det/Unsqueeze_6"](%/det/Transpose_output_0, %/det/Constant_21_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:40:0
%/det/Shape_output_0 : Long(3, strides=[1], device=cpu) = onnx::Shape[onnx_name="/det/Shape"](%/det/dfl/Reshape_1_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_22_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={1}, onnx_name="/det/Constant_22"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Gather_output_0 : Long(1, strides=[1], device=cpu) = onnx::Gather[axis=0, onnx_name="/det/Gather"](%/det/Shape_output_0, %/det/Constant_22_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_23_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={0}, onnx_name="/det/Constant_23"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_24_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={1}, onnx_name="/det/Constant_24"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Add_output_0 : Long(1, strides=[1], device=cpu) = onnx::Add[onnx_name="/det/Add"](%/det/Gather_output_0, %/det/Constant_24_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_25_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={2}, onnx_name="/det/Constant_25"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Div_output_0 : Long(1, strides=[1], device=cpu) = onnx::Div[onnx_name="/det/Div"](%/det/Add_output_0, %/det/Constant_25_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_26_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={1}, onnx_name="/det/Constant_26"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Mul_output_0 : Long(1, strides=[1], device=cpu) = onnx::Mul[onnx_name="/det/Mul"](%/det/Div_output_0, %/det/Constant_26_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Slice_output_0 : Float(1, 2, 5040, strides=[20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Slice[onnx_name="/det/Slice"](%/det/dfl/Reshape_1_output_0, %/det/Constant_23_output_0, %/det/Mul_output_0, %/det/Constant_22_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Constant_27_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={2}, onnx_name="/det/Constant_27"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Mul_1_output_0 : Long(1, strides=[1], device=cpu) = onnx::Mul[onnx_name="/det/Mul_1"](%/det/Div_output_0, %/det/Constant_27_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Slice_1_output_0 : Float(1, 2, 5040, strides=[20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Slice[onnx_name="/det/Slice_1"](%/det/dfl/Reshape_1_output_0, %/det/Mul_output_0, %/det/Mul_1_output_0, %/det/Constant_22_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:3:0
%/det/Sub_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=1, device=cpu) = onnx::Sub[onnx_name="/det/Sub"](%/det/Unsqueeze_6_output_0, %/det/Slice_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:4:0
%/det/Add_1_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=1, device=cpu) = onnx::Add[onnx_name="/det/Add_1"](%/det/Unsqueeze_6_output_0, %/det/Slice_1_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:5:0
%/det/Add_2_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=1, device=cpu) = onnx::Add[onnx_name="/det/Add_2"](%/det/Sub_output_0, %/det/Add_1_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:7:0
%/det/Constant_28_output_0 : Float(requires_grad=0, device=cpu) = onnx::Constant[value={2}, onnx_name="/det/Constant_28"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:7:0
%/det/Div_1_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=1, device=cpu) = onnx::Div[onnx_name="/det/Div_1"](%/det/Add_2_output_0, %/det/Constant_28_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:7:0
%/det/Sub_1_output_0 : Float(1, 2, 5040, strides=[2, 1, 2], requires_grad=1, device=cpu) = onnx::Sub[onnx_name="/det/Sub_1"](%/det/Add_1_output_0, %/det/Sub_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:8:0
%/det/Concat_8_output_0 : Float(1, 4, 5040, strides=[20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=1, onnx_name="/det/Concat_8"](%/det/Div_1_output_0, %/det/Sub_1_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/1512103560.py:9:0
%/det/Constant_29_output_0 : Float(1, 5040, strides=[5040, 1], requires_grad=0, device=cpu) = onnx::Constant[value=<Tensor>, onnx_name="/det/Constant_29"](), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:40:0
%/det/Mul_2_output_0 : Float(1, 4, 5040, strides=[20160, 5040, 1], requires_grad=1, device=cpu) = onnx::Mul[onnx_name="/det/Mul_2"](%/det/Concat_8_output_0, %/det/Constant_29_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:40:0
%/det/Sigmoid_output_0 : Float(1, 80, 5040, strides=[403200, 5040, 1], requires_grad=1, device=cpu) = onnx::Sigmoid[onnx_name="/det/Sigmoid"](%/det/Split_output_1), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:51:0
%output : Float(1, 84, 5040, strides=[423360, 5040, 1], requires_grad=1, device=cpu) = onnx::Concat[axis=1, onnx_name="/det/Concat_9"](%/det/Mul_2_output_0, %/det/Sigmoid_output_0), scope: __main__.M::/__main__.Detect::det # /tmp/ipykernel_2640477/4104326077.py:51:0
return (%output)
/tmp/ipykernel_2640477/2190579285.py:8: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results).
for i, stride in enumerate(strides):
import onnx
import tvm
from tvm import relay
onnx_model = onnx.load(f"{root_dir}/{output_name}.onnx")
mod, params = relay.frontend.from_onnx(onnx_model, {"data": shape}, freeze_params=True)
mod = relay.transform.InferType()(mod)
# with tvm.transform.PassContext(opt_level=3):
# mod = relay.quantize.prerequisite_optimize(mod, params)
# mod.show()
print(mod)
def @main(%data: Tensor[(1, 3, 48, 80), float32] /* ty=Tensor[(1, 3, 48, 80), float32] span=/conv0/Conv.data:0:0 */) -> Tensor[(1, 84, 5040), float32] {
%0 = nn.conv2d(%data, meta[relay.Constant][1] /* ty=Tensor[(16, 3, 1, 1), float32] span=/conv0/Conv.conv0.weight:0:0 */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1]) /* ty=Tensor[(1, 16, 48, 80), float32] span=/conv0/Conv:0:0 */;
%1 = nn.conv2d(%0, meta[relay.Constant][2] /* ty=Tensor[(16, 16, 1, 1), float32] span=/conv1/Conv.conv1.weight:0:0 */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1]) /* ty=Tensor[(1, 16, 48, 80), float32] span=/conv1/Conv:0:0 */;
%2 = nn.conv2d(%1, meta[relay.Constant][3] /* ty=Tensor[(64, 16, 3, 3), float32] span=/det/cv2.0/cv2.0.0/conv/Conv.onnx::Conv_296:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/conv/Conv:0:0 */;
%3 = nn.bias_add(%2, meta[relay.Constant][4] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.0/conv/Conv.onnx::Conv_297:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/conv/Conv:0:0 */;
%4 = nn.relu(%3) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/act/Relu:0:0 */;
%5 = nn.conv2d(%4, meta[relay.Constant][5] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.0/cv2.0.1/conv/Conv.onnx::Conv_299:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/conv/Conv:0:0 */;
%6 = nn.bias_add(%5, meta[relay.Constant][6] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.1/conv/Conv.onnx::Conv_300:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/conv/Conv:0:0 */;
%7 = nn.relu(%6) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/act/Relu:0:0 */;
%8 = nn.conv2d(%7, meta[relay.Constant][7] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.0/cv2.0.2/Conv.det.cv2.0.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.2/Conv:0:0 */;
%9 = nn.conv2d(%1, meta[relay.Constant][9] /* ty=Tensor[(80, 16, 3, 3), float32] span=/det/cv3.0/cv3.0.0/conv/Conv.onnx::Conv_302:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv:0:0 */;
%10 = nn.bias_add(%9, meta[relay.Constant][10] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv.onnx::Conv_303:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv:0:0 */;
%11 = nn.relu(%10) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/act/Relu:0:0 */;
%12 = nn.conv2d(%11, meta[relay.Constant][11] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.0/cv3.0.1/conv/Conv.onnx::Conv_305:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv:0:0 */;
%13 = nn.bias_add(%12, meta[relay.Constant][12] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv.onnx::Conv_306:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv:0:0 */;
%14 = nn.relu(%13) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/act/Relu:0:0 */;
%15 = nn.conv2d(%14, meta[relay.Constant][13] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.0/cv3.0.2/Conv.det.cv3.0.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.2/Conv:0:0 */;
%16 = nn.bias_add(%8, meta[relay.Constant][8] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.2/Conv.det.cv2.0.2.bias:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.2/Conv:0:0 */;
%17 = nn.bias_add(%15, meta[relay.Constant][14] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.2/Conv.det.cv3.0.2.bias:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.2/Conv:0:0 */;
%18 = (%16, %17) /* ty=(Tensor[(1, 64, 48, 80), float32], Tensor[(1, 80, 48, 80), float32]) span=/det/Concat:0:0 */;
%19 = concatenate(%18, axis=1) /* ty=Tensor[(1, 144, 48, 80), float32] span=/det/Concat:0:0 */;
%20 = nn.conv2d(%0, meta[relay.Constant][15] /* ty=Tensor[(256, 16, 1, 1), float32] span=/conv2/Conv.conv2.weight:0:0 */, strides=[2, 2], padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1]) /* ty=Tensor[(1, 256, 24, 40), float32] span=/conv2/Conv:0:0 */;
%21 = nn.conv2d(%20, meta[relay.Constant][16] /* ty=Tensor[(64, 256, 3, 3), float32] span=/det/cv2.1/cv2.1.0/conv/Conv.onnx::Conv_308:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/conv/Conv:0:0 */;
%22 = nn.bias_add(%21, meta[relay.Constant][17] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.0/conv/Conv.onnx::Conv_309:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/conv/Conv:0:0 */;
%23 = nn.relu(%22) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/act/Relu:0:0 */;
%24 = nn.conv2d(%23, meta[relay.Constant][18] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.1/cv2.1.1/conv/Conv.onnx::Conv_311:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/conv/Conv:0:0 */;
%25 = nn.bias_add(%24, meta[relay.Constant][19] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.1/conv/Conv.onnx::Conv_312:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/conv/Conv:0:0 */;
%26 = nn.relu(%25) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/act/Relu:0:0 */;
%27 = nn.conv2d(%26, meta[relay.Constant][20] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.1/cv2.1.2/Conv.det.cv2.1.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.2/Conv:0:0 */;
%28 = nn.conv2d(%20, meta[relay.Constant][22] /* ty=Tensor[(80, 256, 3, 3), float32] span=/det/cv3.1/cv3.1.0/conv/Conv.onnx::Conv_314:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/conv/Conv:0:0 */;
%29 = nn.bias_add(%28, meta[relay.Constant][23] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.0/conv/Conv.onnx::Conv_315:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/conv/Conv:0:0 */;
%30 = nn.relu(%29) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/act/Relu:0:0 */;
%31 = nn.conv2d(%30, meta[relay.Constant][24] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.1/cv3.1.1/conv/Conv.onnx::Conv_317:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/conv/Conv:0:0 */;
%32 = nn.bias_add(%31, meta[relay.Constant][25] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.1/conv/Conv.onnx::Conv_318:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/conv/Conv:0:0 */;
%33 = nn.relu(%32) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/act/Relu:0:0 */;
%34 = nn.conv2d(%33, meta[relay.Constant][26] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.1/cv3.1.2/Conv.det.cv3.1.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.2/Conv:0:0 */;
%35 = nn.bias_add(%27, meta[relay.Constant][21] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.2/Conv.det.cv2.1.2.bias:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.2/Conv:0:0 */;
%36 = nn.bias_add(%34, meta[relay.Constant][27] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.2/Conv.det.cv3.1.2.bias:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.2/Conv:0:0 */;
%37 = (%35, %36) /* ty=(Tensor[(1, 64, 24, 40), float32], Tensor[(1, 80, 24, 40), float32]) span=/det/Concat_1:0:0 */;
%38 = concatenate(%37, axis=1) /* ty=Tensor[(1, 144, 24, 40), float32] span=/det/Concat_1:0:0 */;
%39 = nn.conv2d(%0, meta[relay.Constant][28] /* ty=Tensor[(1024, 16, 1, 1), float32] span=/conv3/Conv.conv3.weight:0:0 */, strides=[4, 4], padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1]) /* ty=Tensor[(1, 1024, 12, 20), float32] span=/conv3/Conv:0:0 */;
%40 = nn.conv2d(%39, meta[relay.Constant][29] /* ty=Tensor[(64, 1024, 3, 3), float32] span=/det/cv2.2/cv2.2.0/conv/Conv.onnx::Conv_320:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/conv/Conv:0:0 */;
%41 = nn.bias_add(%40, meta[relay.Constant][30] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.0/conv/Conv.onnx::Conv_321:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/conv/Conv:0:0 */;
%42 = nn.relu(%41) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/act/Relu:0:0 */;
%43 = nn.conv2d(%42, meta[relay.Constant][31] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.2/cv2.2.1/conv/Conv.onnx::Conv_323:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/conv/Conv:0:0 */;
%44 = nn.bias_add(%43, meta[relay.Constant][32] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.1/conv/Conv.onnx::Conv_324:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/conv/Conv:0:0 */;
%45 = nn.relu(%44) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/act/Relu:0:0 */;
%46 = nn.conv2d(%45, meta[relay.Constant][33] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.2/cv2.2.2/Conv.det.cv2.2.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.2/Conv:0:0 */;
%47 = nn.conv2d(%39, meta[relay.Constant][35] /* ty=Tensor[(80, 1024, 3, 3), float32] span=/det/cv3.2/cv3.2.0/conv/Conv.onnx::Conv_326:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/conv/Conv:0:0 */;
%48 = nn.bias_add(%47, meta[relay.Constant][36] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.0/conv/Conv.onnx::Conv_327:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/conv/Conv:0:0 */;
%49 = nn.relu(%48) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/act/Relu:0:0 */;
%50 = nn.conv2d(%49, meta[relay.Constant][37] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.2/cv3.2.1/conv/Conv.onnx::Conv_329:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/conv/Conv:0:0 */;
%51 = nn.bias_add(%50, meta[relay.Constant][38] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.1/conv/Conv.onnx::Conv_330:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/conv/Conv:0:0 */;
%52 = nn.relu(%51) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/act/Relu:0:0 */;
%53 = nn.conv2d(%52, meta[relay.Constant][39] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.2/cv3.2.2/Conv.det.cv3.2.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.2/Conv:0:0 */;
%54 = nn.bias_add(%46, meta[relay.Constant][34] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.2/Conv.det.cv2.2.2.bias:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.2/Conv:0:0 */;
%55 = nn.bias_add(%53, meta[relay.Constant][40] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.2/Conv.det.cv3.2.2.bias:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.2/Conv:0:0 */;
%56 = (%54, %55) /* ty=(Tensor[(1, 64, 12, 20), float32], Tensor[(1, 80, 12, 20), float32]) span=/det/Concat_2:0:0 */;
%57 = concatenate(%56, axis=1) /* ty=Tensor[(1, 144, 12, 20), float32] span=/det/Concat_2:0:0 */;
%58 = reshape(%19, newshape=[1, 144, -1]) /* ty=Tensor[(1, 144, 3840), float32] span=/det/Reshape_3:0:0 */;
%59 = reshape(%38, newshape=[1, 144, -1]) /* ty=Tensor[(1, 144, 960), float32] span=/det/Reshape_4:0:0 */;
%60 = reshape(%57, newshape=[1, 144, -1]) /* ty=Tensor[(1, 144, 240), float32] span=/det/Reshape_5:0:0 */;
%61 = (%58, %59, %60) /* ty=(Tensor[(1, 144, 3840), float32], Tensor[(1, 144, 960), float32], Tensor[(1, 144, 240), float32]) span=/det/Concat_7:0:0 */;
%62 = concatenate(%61, axis=2) /* ty=Tensor[(1, 144, 5040), float32] span=/det/Concat_7:0:0 */;
%63 = split(%62, indices_or_sections=[64i64], axis=1) /* ty=(Tensor[(1, 64, 5040), float32], Tensor[(1, 80, 5040), float32]) span=/det/Split:0:0 */;
%64 = %63.0 /* ty=Tensor[(1, 64, 5040), float32] span=/det/Split:0:0 */;
%65 = reshape(%64, newshape=[1, 16, 4, 5040]) /* ty=Tensor[(1, 16, 4, 5040), float32] span=/det/dfl/Reshape:0:0 */;
%66 = nn.softmax(%65, axis=1) /* ty=Tensor[(1, 16, 4, 5040), float32] span=/det/dfl/Softmax:0:0 */;
%67 = nn.conv2d(%66, meta[relay.Constant][41] /* ty=Tensor[(1, 16, 1, 1), float32] span=/det/dfl/conv/Conv.det.dfl.conv.weight:0:0 */, padding=[0, 0, 0, 0], channels=1, kernel_size=[1, 1]) /* ty=Tensor[(1, 1, 4, 5040), float32] span=/det/dfl/conv/Conv:0:0 */;
%68 = reshape(%67, newshape=[1, 4, 5040]) /* ty=Tensor[(1, 4, 5040), float32] span=/det/dfl/Reshape_1:0:0 */;
%69 = strided_slice(%68, begin=[0i64], end=[2i64], strides=[1i64], axes=[1i64]) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Slice:0:0 */;
%70 = strided_slice(%68, begin=[2i64], end=[4i64], strides=[1i64], axes=[1i64]) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Slice_1:0:0 */;
%71 = subtract(meta[relay.Constant][0] /* ty=Tensor[(1, 2, 5040), float32] span=/det/Unsqueeze_6:0:0 */, %69) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Sub:0:0 */;
%72 = add(meta[relay.Constant][0] /* ty=Tensor[(1, 2, 5040), float32] span=/det/Unsqueeze_6:0:0 */, %70) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Add_1:0:0 */;
%73 = add(%71, %72) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Add_2:0:0 */;
%74 = divide(%73, 2f /* ty=float32 span=/det/Constant_28:0:0 */) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Div_1:0:0 */;
%75 = subtract(%72, %71) /* ty=Tensor[(1, 2, 5040), float32] span=/det/Sub_1:0:0 */;
%76 = (%74, %75) /* ty=(Tensor[(1, 2, 5040), float32], Tensor[(1, 2, 5040), float32]) span=/det/Concat_8:0:0 */;
%77 = concatenate(%76, axis=1) /* ty=Tensor[(1, 4, 5040), float32] span=/det/Concat_8:0:0 */;
%78 = %63.1 /* ty=Tensor[(1, 80, 5040), float32] span=/det/Split:0:0 */;
%79 = multiply(%77, meta[relay.Constant][42] /* ty=Tensor[(1, 5040), float32] span=/det/Constant_29:0:0 */) /* ty=Tensor[(1, 4, 5040), float32] span=/det/Mul_2:0:0 */;
%80 = sigmoid(%78) /* ty=Tensor[(1, 80, 5040), float32] span=/det/Sigmoid:0:0 */;
%81 = (%79, %80) /* ty=(Tensor[(1, 4, 5040), float32], Tensor[(1, 80, 5040), float32]) span=/det/Concat_9:0:0 */;
concatenate(%81, axis=1) /* ty=Tensor[(1, 84, 5040), float32] span=/det/Concat_9:0:0 */
}
from tvm.relay.dataflow_pattern import rewrite
from tvm.relay.analysis import extract_intermdeiate_expr
from yolo_dfl import DFLV1Rewrite, DFLV2Rewrite, DFLV3Rewrite
from yolo_dist2bbox import Dist2BBoxRewrite
from yolo_concat_split import VTAYoloOutputConcatSplitRewrite
from yolo_concat_split_concat import VTAYoloOutputConcatSplitConcatRewrite
onnx_model = onnx.load(f"{root_dir}/{output_name}.onnx")
mod, params = relay.frontend.from_onnx(onnx_model, {"data": shape}, freeze_params=True)
mod = relay.transform.InferType()(mod)
with tvm.transform.PassContext(opt_level=3):
# 融合 concatenate+resahpe+concatenate+split
mod["main"] = rewrite(VTAYoloOutputConcatSplitRewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
# 简化 DFL 结构
mod["main"] = rewrite(DFLV1Rewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
mod["main"] = rewrite(DFLV2Rewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
mod["main"] = rewrite(DFLV3Rewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
# 融合 Dist2BBox
mod["main"] = rewrite(Dist2BBoxRewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
# 融合 Yolo 输出节点 concat+split+concat
mod["main"] = rewrite(VTAYoloOutputConcatSplitConcatRewrite(), mod["main"])
mod = relay.transform.InferType()(mod)
mod.show()
def @main(%data: Tensor[(1, 3, 48, 80), float32] /* ty=Tensor[(1, 3, 48, 80), float32] span=/conv0/Conv.data:0:0 */) -> Tensor[(1, 84, 5040), float32] {
%0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 1, 1), float32] span=/conv0/Conv.conv0.weight:0:0 */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1]) /* ty=Tensor[(1, 16, 48, 80), float32] span=/conv0/Conv:0:0 */;
%1 = nn.conv2d(%0, meta[relay.Constant][1] /* ty=Tensor[(16, 16, 1, 1), float32] span=/conv1/Conv.conv1.weight:0:0 */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1]) /* ty=Tensor[(1, 16, 48, 80), float32] span=/conv1/Conv:0:0 */;
%2 = nn.conv2d(%1, meta[relay.Constant][2] /* ty=Tensor[(64, 16, 3, 3), float32] span=/det/cv2.0/cv2.0.0/conv/Conv.onnx::Conv_296:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/conv/Conv:0:0 */;
%3 = nn.bias_add(%2, meta[relay.Constant][3] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.0/conv/Conv.onnx::Conv_297:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/conv/Conv:0:0 */;
%4 = nn.relu(%3) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.0/act/Relu:0:0 */;
%5 = nn.conv2d(%4, meta[relay.Constant][4] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.0/cv2.0.1/conv/Conv.onnx::Conv_299:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/conv/Conv:0:0 */;
%6 = nn.bias_add(%5, meta[relay.Constant][5] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.1/conv/Conv.onnx::Conv_300:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/conv/Conv:0:0 */;
%7 = nn.relu(%6) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.1/act/Relu:0:0 */;
%8 = nn.conv2d(%7, meta[relay.Constant][6] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.0/cv2.0.2/Conv.det.cv2.0.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.2/Conv:0:0 */;
%9 = nn.conv2d(%1, meta[relay.Constant][8] /* ty=Tensor[(80, 16, 3, 3), float32] span=/det/cv3.0/cv3.0.0/conv/Conv.onnx::Conv_302:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv:0:0 */;
%10 = nn.bias_add(%9, meta[relay.Constant][9] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv.onnx::Conv_303:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/conv/Conv:0:0 */;
%11 = nn.relu(%10) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.0/act/Relu:0:0 */;
%12 = nn.conv2d(%11, meta[relay.Constant][10] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.0/cv3.0.1/conv/Conv.onnx::Conv_305:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv:0:0 */;
%13 = nn.bias_add(%12, meta[relay.Constant][11] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv.onnx::Conv_306:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/conv/Conv:0:0 */;
%14 = nn.relu(%13) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.1/act/Relu:0:0 */;
%15 = nn.conv2d(%14, meta[relay.Constant][12] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.0/cv3.0.2/Conv.det.cv3.0.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.2/Conv:0:0 */;
%16 = nn.conv2d(%0, meta[relay.Constant][14] /* ty=Tensor[(256, 16, 1, 1), float32] span=/conv2/Conv.conv2.weight:0:0 */, strides=[2, 2], padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1]) /* ty=Tensor[(1, 256, 24, 40), float32] span=/conv2/Conv:0:0 */;
%17 = nn.conv2d(%16, meta[relay.Constant][15] /* ty=Tensor[(64, 256, 3, 3), float32] span=/det/cv2.1/cv2.1.0/conv/Conv.onnx::Conv_308:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/conv/Conv:0:0 */;
%18 = nn.bias_add(%17, meta[relay.Constant][16] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.0/conv/Conv.onnx::Conv_309:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/conv/Conv:0:0 */;
%19 = nn.relu(%18) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.0/act/Relu:0:0 */;
%20 = nn.conv2d(%19, meta[relay.Constant][17] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.1/cv2.1.1/conv/Conv.onnx::Conv_311:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/conv/Conv:0:0 */;
%21 = nn.bias_add(%20, meta[relay.Constant][18] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.1/conv/Conv.onnx::Conv_312:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/conv/Conv:0:0 */;
%22 = nn.relu(%21) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.1/act/Relu:0:0 */;
%23 = nn.conv2d(%22, meta[relay.Constant][19] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.1/cv2.1.2/Conv.det.cv2.1.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.2/Conv:0:0 */;
%24 = nn.conv2d(%16, meta[relay.Constant][21] /* ty=Tensor[(80, 256, 3, 3), float32] span=/det/cv3.1/cv3.1.0/conv/Conv.onnx::Conv_314:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/conv/Conv:0:0 */;
%25 = nn.bias_add(%24, meta[relay.Constant][22] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.0/conv/Conv.onnx::Conv_315:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/conv/Conv:0:0 */;
%26 = nn.relu(%25) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.0/act/Relu:0:0 */;
%27 = nn.conv2d(%26, meta[relay.Constant][23] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.1/cv3.1.1/conv/Conv.onnx::Conv_317:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/conv/Conv:0:0 */;
%28 = nn.bias_add(%27, meta[relay.Constant][24] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.1/conv/Conv.onnx::Conv_318:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/conv/Conv:0:0 */;
%29 = nn.relu(%28) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.1/act/Relu:0:0 */;
%30 = nn.conv2d(%29, meta[relay.Constant][25] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.1/cv3.1.2/Conv.det.cv3.1.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.2/Conv:0:0 */;
%31 = nn.conv2d(%0, meta[relay.Constant][27] /* ty=Tensor[(1024, 16, 1, 1), float32] span=/conv3/Conv.conv3.weight:0:0 */, strides=[4, 4], padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1]) /* ty=Tensor[(1, 1024, 12, 20), float32] span=/conv3/Conv:0:0 */;
%32 = nn.conv2d(%31, meta[relay.Constant][28] /* ty=Tensor[(64, 1024, 3, 3), float32] span=/det/cv2.2/cv2.2.0/conv/Conv.onnx::Conv_320:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/conv/Conv:0:0 */;
%33 = nn.bias_add(%32, meta[relay.Constant][29] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.0/conv/Conv.onnx::Conv_321:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/conv/Conv:0:0 */;
%34 = nn.relu(%33) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.0/act/Relu:0:0 */;
%35 = nn.conv2d(%34, meta[relay.Constant][30] /* ty=Tensor[(64, 64, 3, 3), float32] span=/det/cv2.2/cv2.2.1/conv/Conv.onnx::Conv_323:0:0 */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/conv/Conv:0:0 */;
%36 = nn.bias_add(%35, meta[relay.Constant][31] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.1/conv/Conv.onnx::Conv_324:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/conv/Conv:0:0 */;
%37 = nn.relu(%36) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.1/act/Relu:0:0 */;
%38 = nn.conv2d(%37, meta[relay.Constant][32] /* ty=Tensor[(64, 64, 1, 1), float32] span=/det/cv2.2/cv2.2.2/Conv.det.cv2.2.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1]) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.2/Conv:0:0 */;
%39 = nn.conv2d(%31, meta[relay.Constant][34] /* ty=Tensor[(80, 1024, 3, 3), float32] span=/det/cv3.2/cv3.2.0/conv/Conv.onnx::Conv_326:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/conv/Conv:0:0 */;
%40 = nn.bias_add(%39, meta[relay.Constant][35] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.0/conv/Conv.onnx::Conv_327:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/conv/Conv:0:0 */;
%41 = nn.relu(%40) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.0/act/Relu:0:0 */;
%42 = nn.conv2d(%41, meta[relay.Constant][36] /* ty=Tensor[(80, 80, 3, 3), float32] span=/det/cv3.2/cv3.2.1/conv/Conv.onnx::Conv_329:0:0 */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/conv/Conv:0:0 */;
%43 = nn.bias_add(%42, meta[relay.Constant][37] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.1/conv/Conv.onnx::Conv_330:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/conv/Conv:0:0 */;
%44 = nn.relu(%43) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.1/act/Relu:0:0 */;
%45 = nn.conv2d(%44, meta[relay.Constant][38] /* ty=Tensor[(80, 80, 1, 1), float32] span=/det/cv3.2/cv3.2.2/Conv.det.cv3.2.2.weight:0:0 */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1]) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.2/Conv:0:0 */;
%46 = nn.bias_add(%8, meta[relay.Constant][7] /* ty=Tensor[(64), float32] span=/det/cv2.0/cv2.0.2/Conv.det.cv2.0.2.bias:0:0 */) /* ty=Tensor[(1, 64, 48, 80), float32] span=/det/cv2.0/cv2.0.2/Conv:0:0 */;
%47 = nn.bias_add(%15, meta[relay.Constant][13] /* ty=Tensor[(80), float32] span=/det/cv3.0/cv3.0.2/Conv.det.cv3.0.2.bias:0:0 */) /* ty=Tensor[(1, 80, 48, 80), float32] span=/det/cv3.0/cv3.0.2/Conv:0:0 */;
%48 = nn.bias_add(%23, meta[relay.Constant][20] /* ty=Tensor[(64), float32] span=/det/cv2.1/cv2.1.2/Conv.det.cv2.1.2.bias:0:0 */) /* ty=Tensor[(1, 64, 24, 40), float32] span=/det/cv2.1/cv2.1.2/Conv:0:0 */;
%49 = nn.bias_add(%30, meta[relay.Constant][26] /* ty=Tensor[(80), float32] span=/det/cv3.1/cv3.1.2/Conv.det.cv3.1.2.bias:0:0 */) /* ty=Tensor[(1, 80, 24, 40), float32] span=/det/cv3.1/cv3.1.2/Conv:0:0 */;
%50 = nn.bias_add(%38, meta[relay.Constant][33] /* ty=Tensor[(64), float32] span=/det/cv2.2/cv2.2.2/Conv.det.cv2.2.2.bias:0:0 */) /* ty=Tensor[(1, 64, 12, 20), float32] span=/det/cv2.2/cv2.2.2/Conv:0:0 */;
%51 = nn.bias_add(%45, meta[relay.Constant][39] /* ty=Tensor[(80), float32] span=/det/cv3.2/cv3.2.2/Conv.det.cv3.2.2.bias:0:0 */) /* ty=Tensor[(1, 80, 12, 20), float32] span=/det/cv3.2/cv3.2.2/Conv:0:0 */;
%52 = (%46, %47, %48, %49, %50, %51) /* ty=(Tensor[(1, 64, 48, 80), float32], Tensor[(1, 80, 48, 80), float32], Tensor[(1, 64, 24, 40), float32], Tensor[(1, 80, 24, 40), float32], Tensor[(1, 64, 12, 20), float32], Tensor[(1, 80, 12, 20), float32]) */;
vta_special.yolo_concat_split_concat(%52, __dict__={"x0_split"=-1, "x5_split"=-1, "x4_scale"=-1, "strides"=[8, 16, 32], "x0_scale"=-1, "x5_scale"=-1, "x1_split"=-1, "x2_split"=-1, "x1_scale"=-1, "y_split"=64, "x3_split"=-1, "x2_scale"=-1, "grid_cell_offset"=0.5f, "x4_split"=-1, "x3_scale"=-1}) /* ty=Tensor[(1, 84, 5040), float32] */
}
from PIL import Image
import numpy as np
image = np.random.normal(0, 1, size=(48, 80, 3)).astype("uint8")
mean = (128,)
std = (256,)
data = (image - mean)/std
data = data.transpose((2, 0, 1))
data = np.expand_dims(data, 0).astype("float32")
images = np.expand_dims(image, 0)
images.tofile(f"{root_dir}/input.bin")
Image.fromarray(image).resize((112, 112))
from dataclasses import dataclass
@dataclass
class Dataset:
input_name: str
shape: tuple
def __iter__(self):
for _ in range(2):
yield {self.input_name: data}
# for _ in range(50):
# yield {self.input_name: np.random.normal(0, 1, size=self.shape).astype("float32")}
dataset = Dataset(input_name, shape)
with tvm.transform.PassContext(opt_level=3):
with relay.quantize.qconfig(
skip_conv_layers=[],
calibrate_mode="kl_divergence",
weight_scale="max",
# round_for_shift=True,
# rounding="TONEAREST", # "UPWARD" or "TONEAREST"
skip_dense_layer=False,
):
qmod = relay.quantize.quantize(mod, params=params, dataset=dataset)
WARNING:autotvm:One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.
qmod.show()
def @main(%data: Tensor[(1, 3, 48, 80), float32] /* ty=Tensor[(1, 3, 48, 80), float32] span=/conv0/Conv.data:0:0 */) -> Tensor[(1, 84, 5040), float32] {
%0 = multiply(%data, 257.222f /* ty=float32 */) /* ty=Tensor[(1, 3, 48, 80), float32] */;
%1 = round(%0) /* ty=Tensor[(1, 3, 48, 80), float32] */;
%2 = clip(%1, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 3, 48, 80), float32] */;
%3 = cast(%2, dtype="int8") /* ty=Tensor[(1, 3, 48, 80), int8] */;
%4 = nn.conv2d(%3, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 16, 48, 80), int32] */;
%5 = cast(%4, dtype="int64") /* ty=Tensor[(1, 16, 48, 80), int64] */;
%6 = fixed_point_multiply(%5, multiplier=1729292288, shift=-8) /* ty=Tensor[(1, 16, 48, 80), int64] */;
%7 = clip(%6, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 48, 80), int64] */;
%8 = cast(%7, dtype="int32") /* ty=Tensor[(1, 16, 48, 80), int32] */;
%9 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 48, 80), int8] */;
%10 = annotation.stop_fusion(%9) /* ty=Tensor[(1, 16, 48, 80), int8] */;
%11 = nn.conv2d(%10, meta[relay.Constant][1] /* ty=Tensor[(16, 16, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=16, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 16, 48, 80), int32] */;
%12 = cast(%11, dtype="int64") /* ty=Tensor[(1, 16, 48, 80), int64] */;
%13 = fixed_point_multiply(%12, multiplier=2084930560, shift=-8) /* ty=Tensor[(1, 16, 48, 80), int64] */;
%14 = clip(%13, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 16, 48, 80), int64] */;
%15 = cast(%14, dtype="int32") /* ty=Tensor[(1, 16, 48, 80), int32] */;
%16 = cast(%15, dtype="int8") /* ty=Tensor[(1, 16, 48, 80), int8] */;
%17 = annotation.stop_fusion(%16) /* ty=Tensor[(1, 16, 48, 80), int8] */;
%18 = nn.conv2d(%17, meta[relay.Constant][2] /* ty=Tensor[(64, 16, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%19 = fixed_point_multiply(%18, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%20 = cast(%19, dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%21 = add(%20, meta[relay.Constant][3] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%22 = nn.relu(%21) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%23 = cast(%22, dtype="int64") /* ty=Tensor[(1, 64, 48, 80), int64] */;
%24 = fixed_point_multiply(%23, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%25 = clip(%24, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%26 = cast(%25, dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%27 = cast(%26, dtype="int8") /* ty=Tensor[(1, 64, 48, 80), int8] */;
%28 = annotation.stop_fusion(%27) /* ty=Tensor[(1, 64, 48, 80), int8] */;
%29 = nn.conv2d(%28, meta[relay.Constant][4] /* ty=Tensor[(64, 64, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%30 = fixed_point_multiply(%29, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%31 = cast(%30, dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%32 = add(%31, meta[relay.Constant][5] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%33 = nn.relu(%32) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%34 = cast(%33, dtype="int64") /* ty=Tensor[(1, 64, 48, 80), int64] */;
%35 = fixed_point_multiply(%34, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%36 = clip(%35, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%37 = cast(%36, dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%38 = cast(%37, dtype="int8") /* ty=Tensor[(1, 64, 48, 80), int8] */;
%39 = annotation.stop_fusion(%38) /* ty=Tensor[(1, 64, 48, 80), int8] */;
%40 = nn.conv2d(%39, meta[relay.Constant][6] /* ty=Tensor[(64, 64, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%41 = add(%40, meta[relay.Constant][7] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 48, 80), int32] */;
%42 = cast(%41, dtype="int64") /* ty=Tensor[(1, 64, 48, 80), int64] */;
%43 = fixed_point_multiply(%42, multiplier=1769092864, shift=-10) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%44 = clip(%43, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 48, 80), int64] */;
%45 = cast(%44, dtype="int32") /* ty=Tensor[(1, 64, 48, 80), int32] */;
%46 = cast(%45, dtype="int8") /* ty=Tensor[(1, 64, 48, 80), int8] */;
%47 = annotation.stop_fusion(%46) /* ty=Tensor[(1, 64, 48, 80), int8] */;
%48 = cast(%47, dtype="float32") /* ty=Tensor[(1, 64, 48, 80), float32] */;
%49 = cast(%15, dtype="int8") /* ty=Tensor[(1, 16, 48, 80), int8] */;
%50 = annotation.stop_fusion(%49) /* ty=Tensor[(1, 16, 48, 80), int8] */;
%51 = nn.conv2d(%50, meta[relay.Constant][8] /* ty=Tensor[(80, 16, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%52 = fixed_point_multiply(%51, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%53 = cast(%52, dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%54 = add(%53, meta[relay.Constant][9] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%55 = nn.relu(%54) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%56 = cast(%55, dtype="int64") /* ty=Tensor[(1, 80, 48, 80), int64] */;
%57 = fixed_point_multiply(%56, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%58 = clip(%57, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%59 = cast(%58, dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%60 = cast(%59, dtype="int8") /* ty=Tensor[(1, 80, 48, 80), int8] */;
%61 = annotation.stop_fusion(%60) /* ty=Tensor[(1, 80, 48, 80), int8] */;
%62 = nn.conv2d(%61, meta[relay.Constant][10] /* ty=Tensor[(80, 80, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%63 = fixed_point_multiply(%62, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%64 = cast(%63, dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%65 = add(%64, meta[relay.Constant][11] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%66 = nn.relu(%65) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%67 = cast(%66, dtype="int64") /* ty=Tensor[(1, 80, 48, 80), int64] */;
%68 = fixed_point_multiply(%67, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%69 = clip(%68, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%70 = cast(%69, dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%71 = cast(%70, dtype="int8") /* ty=Tensor[(1, 80, 48, 80), int8] */;
%72 = annotation.stop_fusion(%71) /* ty=Tensor[(1, 80, 48, 80), int8] */;
%73 = nn.conv2d(%72, meta[relay.Constant][12] /* ty=Tensor[(80, 80, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%74 = add(%73, meta[relay.Constant][13] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 48, 80), int32] */;
%75 = cast(%74, dtype="int64") /* ty=Tensor[(1, 80, 48, 80), int64] */;
%76 = fixed_point_multiply(%75, multiplier=1173832960, shift=-9) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%77 = clip(%76, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 48, 80), int64] */;
%78 = cast(%77, dtype="int32") /* ty=Tensor[(1, 80, 48, 80), int32] */;
%79 = cast(%78, dtype="int8") /* ty=Tensor[(1, 80, 48, 80), int8] */;
%80 = annotation.stop_fusion(%79) /* ty=Tensor[(1, 80, 48, 80), int8] */;
%81 = cast(%80, dtype="float32") /* ty=Tensor[(1, 80, 48, 80), float32] */;
%82 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 48, 80), int8] */;
%83 = annotation.stop_fusion(%82) /* ty=Tensor[(1, 16, 48, 80), int8] */;
%84 = nn.conv2d(%83, meta[relay.Constant][14] /* ty=Tensor[(256, 16, 1, 1), int8] */, strides=[2, 2], padding=[0, 0, 0, 0], channels=256, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 256, 24, 40), int32] */;
%85 = cast(%84, dtype="int64") /* ty=Tensor[(1, 256, 24, 40), int64] */;
%86 = fixed_point_multiply(%85, multiplier=1727989888, shift=-8) /* ty=Tensor[(1, 256, 24, 40), int64] */;
%87 = clip(%86, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 256, 24, 40), int64] */;
%88 = cast(%87, dtype="int32") /* ty=Tensor[(1, 256, 24, 40), int32] */;
%89 = cast(%88, dtype="int8") /* ty=Tensor[(1, 256, 24, 40), int8] */;
%90 = annotation.stop_fusion(%89) /* ty=Tensor[(1, 256, 24, 40), int8] */;
%91 = nn.conv2d(%90, meta[relay.Constant][15] /* ty=Tensor[(64, 256, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%92 = fixed_point_multiply(%91, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%93 = cast(%92, dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%94 = add(%93, meta[relay.Constant][16] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%95 = nn.relu(%94) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%96 = cast(%95, dtype="int64") /* ty=Tensor[(1, 64, 24, 40), int64] */;
%97 = fixed_point_multiply(%96, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%98 = clip(%97, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%99 = cast(%98, dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%100 = cast(%99, dtype="int8") /* ty=Tensor[(1, 64, 24, 40), int8] */;
%101 = annotation.stop_fusion(%100) /* ty=Tensor[(1, 64, 24, 40), int8] */;
%102 = nn.conv2d(%101, meta[relay.Constant][17] /* ty=Tensor[(64, 64, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%103 = fixed_point_multiply(%102, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%104 = cast(%103, dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%105 = add(%104, meta[relay.Constant][18] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%106 = nn.relu(%105) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%107 = cast(%106, dtype="int64") /* ty=Tensor[(1, 64, 24, 40), int64] */;
%108 = fixed_point_multiply(%107, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%109 = clip(%108, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%110 = cast(%109, dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%111 = cast(%110, dtype="int8") /* ty=Tensor[(1, 64, 24, 40), int8] */;
%112 = annotation.stop_fusion(%111) /* ty=Tensor[(1, 64, 24, 40), int8] */;
%113 = nn.conv2d(%112, meta[relay.Constant][19] /* ty=Tensor[(64, 64, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%114 = add(%113, meta[relay.Constant][20] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 24, 40), int32] */;
%115 = cast(%114, dtype="int64") /* ty=Tensor[(1, 64, 24, 40), int64] */;
%116 = fixed_point_multiply(%115, multiplier=1694233088, shift=-10) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%117 = clip(%116, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 24, 40), int64] */;
%118 = cast(%117, dtype="int32") /* ty=Tensor[(1, 64, 24, 40), int32] */;
%119 = cast(%118, dtype="int8") /* ty=Tensor[(1, 64, 24, 40), int8] */;
%120 = annotation.stop_fusion(%119) /* ty=Tensor[(1, 64, 24, 40), int8] */;
%121 = cast(%120, dtype="float32") /* ty=Tensor[(1, 64, 24, 40), float32] */;
%122 = cast(%88, dtype="int8") /* ty=Tensor[(1, 256, 24, 40), int8] */;
%123 = annotation.stop_fusion(%122) /* ty=Tensor[(1, 256, 24, 40), int8] */;
%124 = nn.conv2d(%123, meta[relay.Constant][21] /* ty=Tensor[(80, 256, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%125 = fixed_point_multiply(%124, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%126 = cast(%125, dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%127 = add(%126, meta[relay.Constant][22] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%128 = nn.relu(%127) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%129 = cast(%128, dtype="int64") /* ty=Tensor[(1, 80, 24, 40), int64] */;
%130 = fixed_point_multiply(%129, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%131 = clip(%130, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%132 = cast(%131, dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%133 = cast(%132, dtype="int8") /* ty=Tensor[(1, 80, 24, 40), int8] */;
%134 = annotation.stop_fusion(%133) /* ty=Tensor[(1, 80, 24, 40), int8] */;
%135 = nn.conv2d(%134, meta[relay.Constant][23] /* ty=Tensor[(80, 80, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%136 = fixed_point_multiply(%135, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%137 = cast(%136, dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%138 = add(%137, meta[relay.Constant][24] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%139 = nn.relu(%138) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%140 = cast(%139, dtype="int64") /* ty=Tensor[(1, 80, 24, 40), int64] */;
%141 = fixed_point_multiply(%140, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%142 = clip(%141, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%143 = cast(%142, dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%144 = cast(%143, dtype="int8") /* ty=Tensor[(1, 80, 24, 40), int8] */;
%145 = annotation.stop_fusion(%144) /* ty=Tensor[(1, 80, 24, 40), int8] */;
%146 = nn.conv2d(%145, meta[relay.Constant][25] /* ty=Tensor[(80, 80, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%147 = add(%146, meta[relay.Constant][26] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 24, 40), int32] */;
%148 = cast(%147, dtype="int64") /* ty=Tensor[(1, 80, 24, 40), int64] */;
%149 = fixed_point_multiply(%148, multiplier=1958950016, shift=-10) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%150 = clip(%149, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 24, 40), int64] */;
%151 = cast(%150, dtype="int32") /* ty=Tensor[(1, 80, 24, 40), int32] */;
%152 = cast(%151, dtype="int8") /* ty=Tensor[(1, 80, 24, 40), int8] */;
%153 = annotation.stop_fusion(%152) /* ty=Tensor[(1, 80, 24, 40), int8] */;
%154 = cast(%153, dtype="float32") /* ty=Tensor[(1, 80, 24, 40), float32] */;
%155 = cast(%8, dtype="int8") /* ty=Tensor[(1, 16, 48, 80), int8] */;
%156 = annotation.stop_fusion(%155) /* ty=Tensor[(1, 16, 48, 80), int8] */;
%157 = nn.conv2d(%156, meta[relay.Constant][27] /* ty=Tensor[(1024, 16, 1, 1), int8] */, strides=[4, 4], padding=[0, 0, 0, 0], channels=1024, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 1024, 12, 20), int32] */;
%158 = cast(%157, dtype="int64") /* ty=Tensor[(1, 1024, 12, 20), int64] */;
%159 = fixed_point_multiply(%158, multiplier=1483881728, shift=-8) /* ty=Tensor[(1, 1024, 12, 20), int64] */;
%160 = clip(%159, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 1024, 12, 20), int64] */;
%161 = cast(%160, dtype="int32") /* ty=Tensor[(1, 1024, 12, 20), int32] */;
%162 = cast(%161, dtype="int8") /* ty=Tensor[(1, 1024, 12, 20), int8] */;
%163 = annotation.stop_fusion(%162) /* ty=Tensor[(1, 1024, 12, 20), int8] */;
%164 = nn.conv2d(%163, meta[relay.Constant][28] /* ty=Tensor[(64, 1024, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%165 = fixed_point_multiply(%164, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%166 = cast(%165, dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%167 = add(%166, meta[relay.Constant][29] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%168 = nn.relu(%167) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%169 = cast(%168, dtype="int64") /* ty=Tensor[(1, 64, 12, 20), int64] */;
%170 = fixed_point_multiply(%169, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%171 = clip(%170, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%172 = cast(%171, dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%173 = cast(%172, dtype="int8") /* ty=Tensor[(1, 64, 12, 20), int8] */;
%174 = annotation.stop_fusion(%173) /* ty=Tensor[(1, 64, 12, 20), int8] */;
%175 = nn.conv2d(%174, meta[relay.Constant][30] /* ty=Tensor[(64, 64, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%176 = fixed_point_multiply(%175, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%177 = cast(%176, dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%178 = add(%177, meta[relay.Constant][31] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%179 = nn.relu(%178) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%180 = cast(%179, dtype="int64") /* ty=Tensor[(1, 64, 12, 20), int64] */;
%181 = fixed_point_multiply(%180, multiplier=0, shift=0) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%182 = clip(%181, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%183 = cast(%182, dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%184 = cast(%183, dtype="int8") /* ty=Tensor[(1, 64, 12, 20), int8] */;
%185 = annotation.stop_fusion(%184) /* ty=Tensor[(1, 64, 12, 20), int8] */;
%186 = nn.conv2d(%185, meta[relay.Constant][32] /* ty=Tensor[(64, 64, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=64, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%187 = add(%186, meta[relay.Constant][33] /* ty=Tensor[(64, 1, 1), int32] */) /* ty=Tensor[(1, 64, 12, 20), int32] */;
%188 = cast(%187, dtype="int64") /* ty=Tensor[(1, 64, 12, 20), int64] */;
%189 = fixed_point_multiply(%188, multiplier=2024631552, shift=-10) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%190 = clip(%189, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 64, 12, 20), int64] */;
%191 = cast(%190, dtype="int32") /* ty=Tensor[(1, 64, 12, 20), int32] */;
%192 = cast(%191, dtype="int8") /* ty=Tensor[(1, 64, 12, 20), int8] */;
%193 = annotation.stop_fusion(%192) /* ty=Tensor[(1, 64, 12, 20), int8] */;
%194 = cast(%193, dtype="float32") /* ty=Tensor[(1, 64, 12, 20), float32] */;
%195 = cast(%161, dtype="int8") /* ty=Tensor[(1, 1024, 12, 20), int8] */;
%196 = annotation.stop_fusion(%195) /* ty=Tensor[(1, 1024, 12, 20), int8] */;
%197 = nn.conv2d(%196, meta[relay.Constant][34] /* ty=Tensor[(80, 1024, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%198 = fixed_point_multiply(%197, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%199 = cast(%198, dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%200 = add(%199, meta[relay.Constant][35] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%201 = nn.relu(%200) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%202 = cast(%201, dtype="int64") /* ty=Tensor[(1, 80, 12, 20), int64] */;
%203 = fixed_point_multiply(%202, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%204 = clip(%203, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%205 = cast(%204, dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%206 = cast(%205, dtype="int8") /* ty=Tensor[(1, 80, 12, 20), int8] */;
%207 = annotation.stop_fusion(%206) /* ty=Tensor[(1, 80, 12, 20), int8] */;
%208 = nn.conv2d(%207, meta[relay.Constant][36] /* ty=Tensor[(80, 80, 3, 3), int8] */, padding=[1, 1, 1, 1], channels=80, kernel_size=[3, 3], out_dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%209 = fixed_point_multiply(%208, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%210 = cast(%209, dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%211 = add(%210, meta[relay.Constant][37] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%212 = nn.relu(%211) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%213 = cast(%212, dtype="int64") /* ty=Tensor[(1, 80, 12, 20), int64] */;
%214 = fixed_point_multiply(%213, multiplier=0, shift=0) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%215 = clip(%214, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%216 = cast(%215, dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%217 = cast(%216, dtype="int8") /* ty=Tensor[(1, 80, 12, 20), int8] */;
%218 = annotation.stop_fusion(%217) /* ty=Tensor[(1, 80, 12, 20), int8] */;
%219 = nn.conv2d(%218, meta[relay.Constant][38] /* ty=Tensor[(80, 80, 1, 1), int8] */, padding=[0, 0, 0, 0], channels=80, kernel_size=[1, 1], out_dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%220 = add(%219, meta[relay.Constant][39] /* ty=Tensor[(80, 1, 1), int32] */) /* ty=Tensor[(1, 80, 12, 20), int32] */;
%221 = cast(%220, dtype="int64") /* ty=Tensor[(1, 80, 12, 20), int64] */;
%222 = fixed_point_multiply(%221, multiplier=1704013184, shift=-10) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%223 = clip(%222, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 80, 12, 20), int64] */;
%224 = cast(%223, dtype="int32") /* ty=Tensor[(1, 80, 12, 20), int32] */;
%225 = cast(%224, dtype="int8") /* ty=Tensor[(1, 80, 12, 20), int8] */;
%226 = annotation.stop_fusion(%225) /* ty=Tensor[(1, 80, 12, 20), int8] */;
%227 = cast(%226, dtype="float32") /* ty=Tensor[(1, 80, 12, 20), float32] */;
%228 = multiply(%48, 0.00132893f /* ty=float32 */) /* ty=Tensor[(1, 64, 48, 80), float32] */;
%229 = multiply(%81, 0.00124585f /* ty=float32 */) /* ty=Tensor[(1, 80, 48, 80), float32] */;
%230 = multiply(%121, 0.00138369f /* ty=float32 */) /* ty=Tensor[(1, 64, 24, 40), float32] */;
%231 = multiply(%154, 0.00112746f /* ty=float32 */) /* ty=Tensor[(1, 80, 24, 40), float32] */;
%232 = multiply(%194, 0.00126926f /* ty=float32 */) /* ty=Tensor[(1, 64, 12, 20), float32] */;
%233 = multiply(%227, 0.00112651f /* ty=float32 */) /* ty=Tensor[(1, 80, 12, 20), float32] */;
%234 = (%228, %229, %230, %231, %232, %233) /* ty=(Tensor[(1, 64, 48, 80), float32], Tensor[(1, 80, 48, 80), float32], Tensor[(1, 64, 24, 40), float32], Tensor[(1, 80, 24, 40), float32], Tensor[(1, 64, 12, 20), float32], Tensor[(1, 80, 12, 20), float32]) */;
vta_special.yolo_concat_split_concat(%234, __dict__={"x0_split"=-1, "x5_split"=-1, "x4_scale"=-1, "strides"=[8, 16, 32], "x0_scale"=-1, "x5_scale"=-1, "x1_split"=-1, "x2_split"=-1, "x1_scale"=-1, "y_split"=64, "x3_split"=-1, "x2_scale"=-1, "grid_cell_offset"=0.5f, "x4_split"=-1, "x3_scale"=-1}) /* ty=Tensor[(1, 84, 5040), float32] */
}
# from tvm.relay.testing import run_infer_type
# from common.configs.vta_utils import vta_compile
# from vta import export_forXM
# # mean, std = params_revise(config.mean, config.std)
# target = "VTA2.0"
# if target == "VTA2.0":
# board_target = "xmfpga_680v200"
# elif target == "sim":
# board_target = "xmfpga_v500"
# else:
# raise(f"暂未支持 {target}")
# # 编译模型
# lib = vta_compile(
# qmod=qmod,
# params=params,
# target=board_target,
# mean=(0,),
# std=(1,),
# input_name="data",
# debug_flag=0,
# prepare_flag=False
# )
# graph_pack = lib.ir_mod["main"]
# graph_pack = run_infer_type(graph_pack)