重写 DFL

重写 DFL#

import sys
from pathlib import Path
ROOT = Path(".").resolve().parents[5]
# print(ROOT)
sys.path.extend([f"{ROOT}/tests"])
# from tools.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
import tools
from d2py.utils.file import mkdir
root_dir = ".temp"
mkdir(root_dir )

import logging
from configs.set_env import ROOT

# 配置日志消息
logger_name = "compile"
config_logging(f'{root_dir}/logs/{logger_name}.log', logger_name, maxBytes=5000000, backupCount=7)
logger = logging.getLogger(logger_name)

/media/pc/data/board/arria10/lxw/tasks/tvm

import torch
from torch.nn import functional as F
from torch import nn
from torch.onnx import OperatorExportTypes, utils


class DFL(nn.Module):
    """
    Integral module of Distribution Focal Loss (DFL).
    Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
    """

    def __init__(self, c1=16):
        """Initialize a convolutional layer with a given number of input channels."""
        super().__init__()
        self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
        x = torch.arange(c1, dtype=torch.float)
        self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
        self.c1 = c1

    def forward(self, x):
        """Applies a transformer layer on input tensor 'x' and returns a tensor."""
        b, c, a = x.shape  # batch, channels, anchors
        return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
        # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
    # def forward(self, x):
    #     """Applies a transformer layer on input tensor 'x' and returns a tensor."""
    #     b, c, a = x.shape  # batch, channels, anchors
    #     x = x.view(b, 4, self.c1, a)
    #     x = x.transpose(3, 1).transpose(2, 3)
    #     x = x.softmax(3)
    #     x = x.transpose(3, 1)
    #     x = self.conv(x)
    #     return x

model = DFL()
model.eval()

shape = 1, 64, 5040
xx = torch.rand(*shape, dtype=torch.float32, requires_grad=False)
# model = torch.jit.trace(model, xx)
# 导出模型
output_name = "test"
utils.export(
    model,               # torch 模型
    xx,                         # 模型输入或者对于多个输入，使用元组
    f"{root_dir}/{output_name}.onnx",               # 模型保存的位置（可以是文件或类似文件的对象）
    export_params=True,        # 将训练后的参数权重存储在模型文件内
    opset_version=17,          # 导出模型的 ONNX 版本
    do_constant_folding=True,  # 是否执行常量折叠以进行优化
    input_names = ['data'],    # 模型的输入名称
    output_names = ['output'], # 模型的输出名称
    keep_initializers_as_inputs=True,
    # export_modules_as_functions=True,
    verbose=True,
    operator_export_type=OperatorExportTypes.ONNX_FALLTHROUGH,
    # dynamic_axes={'data' : {0 : 'batch_size'},    # 可变长度的轴
    #               'output' : {0 : 'batch_size'}}
)

Exported graph: graph(%data : Float(1, 64, 5040, strides=[322560, 5040, 1], requires_grad=0, device=cpu),
      %conv.weight : Float(1, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=0, device=cpu)):
  %/Constant_output_0 : Long(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value=    1     4    16  5040 [ CPULongType{4} ], onnx_name="/Constant"](), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  %/Reshape_output_0 : Float(1, 4, 16, 5040, strides=[322560, 80640, 5040, 1], requires_grad=0, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/Reshape"](%data, %/Constant_output_0), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  %/Transpose_output_0 : Float(1, 16, 4, 5040, strides=[322560, 5040, 80640, 1], requires_grad=0, device=cpu) = onnx::Transpose[perm=[0, 2, 1, 3], onnx_name="/Transpose"](%/Reshape_output_0), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  %/Softmax_output_0 : Float(1, 16, 4, 5040, strides=[322560, 20160, 5040, 1], requires_grad=0, device=cpu) = onnx::Softmax[axis=1, onnx_name="/Softmax"](%/Transpose_output_0), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  %/conv/Conv_output_0 : Float(1, 1, 4, 5040, strides=[20160, 20160, 5040, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv/Conv"](%/Softmax_output_0, %conv.weight), scope: __main__.DFL::/torch.nn.modules.conv.Conv2d::conv # /media/pc/data/tmp/cache/conda/envs/xin/lib/python3.12/site-packages/torch/nn/modules/conv.py:456:0
  %/Constant_1_output_0 : Long(3, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value=    1     4  5040 [ CPULongType{3} ], onnx_name="/Constant_1"](), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  %output : Float(1, 4, 5040, strides=[20160, 5040, 1], requires_grad=0, device=cpu) = onnx::Reshape[allowzero=0, onnx_name="/Reshape_1"](%/conv/Conv_output_0, %/Constant_1_output_0), scope: __main__.DFL:: # /tmp/ipykernel_1370504/3377343143.py:24:0
  return (%output)

import onnx
import tvm
from tvm import relay
onnx_model = onnx.load(f"{root_dir}/{output_name}.onnx")
mod, params = relay.frontend.from_onnx(onnx_model, {"data": shape}, freeze_params=True)
mod = relay.transform.InferType()(mod)
# with tvm.transform.PassContext(opt_level=3):
#     mod = relay.quantize.prerequisite_optimize(mod, params)
# mod.show()

INFO|2024-05-17 16:50:38,401|compile.onnx| -> op_name: ('Reshape', {'allowzero': 0, 'tvm_custom': {'name': '/Reshape', 'num_outputs': 1}}, 17)
INFO|2024-05-17 16:50:39,151|compile.onnx| -> op_name: ('Transpose', {'perm': (0, 2, 1, 3), 'tvm_custom': {'name': '/Transpose', 'num_outputs': 1}}, 17)
INFO|2024-05-17 16:50:39,154|compile.onnx| -> op_name: ('Softmax', {'axis': 1, 'tvm_custom': {'name': '/Softmax', 'num_outputs': 1}}, 17)
INFO|2024-05-17 16:50:39,157|compile.onnx| -> op_name: ('Conv', {'dilations': (1, 1), 'group': 1, 'kernel_shape': (1, 1), 'pads': (0, 0, 0, 0), 'strides': (1, 1), 'tvm_custom': {'name': '/conv/Conv', 'num_outputs': 1}}, 17)
INFO|2024-05-17 16:50:39,162|compile.onnx| -> op_name: ('Reshape', {'allowzero': 0, 'tvm_custom': {'name': '/Reshape_1', 'num_outputs': 1}}, 17)

print(mod)

def @main(%data: Tensor[(1, 64, 5040), float32]) -> Tensor[(1, 4, 5040), float32] {
  %0 = reshape(%data, newshape=[1, 4, 16, 5040]) /* ty=Tensor[(1, 4, 16, 5040), float32] */;
  %1 = transpose(%0, axes=[0, 2, 1, 3]) /* ty=Tensor[(1, 16, 4, 5040), float32] */;
  %2 = nn.softmax(%1, axis=1) /* ty=Tensor[(1, 16, 4, 5040), float32] */;
  %3 = nn.conv2d(%2, meta[relay.Constant][0] /* ty=Tensor[(1, 16, 1, 1), float32] */, padding=[0, 0, 0, 0], channels=1, kernel_size=[1, 1]) /* ty=Tensor[(1, 1, 4, 5040), float32] */;
  reshape(%3, newshape=[1, 4, 5040]) /* ty=Tensor[(1, 4, 5040), float32] */
}