torch.Tensor.expand() 转换

torch.Tensor.expand() 转换#

%cd ../../..
import set_env
from d2py.utils.file import mkdir
temp_dir = ".temp"
mkdir(temp_dir)
/media/pc/data/lxw/ai/tvm-book/doc/tutorials/frontend
import torch
from torch import nn

class Model(nn.Module):
    def forward(self, x):
        return x.expand(1, 3, 4)

shape = 1, 1, 4
x = torch.rand(*shape)
# torch_out = x.expand(1, 3, 4)

torch_model = Model()
# 导出模型
output_name = "expand"
torch.onnx.export(
    torch_model,               # torch 模型
    x,                         # 模型输入或者对于多个输入,使用元组
    f"{temp_dir}/{output_name}.onnx",               # 模型保存的位置(可以是文件或类似文件的对象)
    export_params=True,        # 将训练后的参数权重存储在模型文件内
    opset_version=17,          # 导出模型的 ONNX 版本
    verbose=True,
    do_constant_folding=True,  # 是否执行常量折叠以进行优化
    input_names = ['data'],    # 模型的输入名称
    output_names = ['output'], # 模型的输出名称
    dynamic_axes={'data' : {0 : 'batch_size'},    # 可变长度的轴
                  'output' : {0 : 'batch_size'}}
)
Exported graph: graph(%data : Float(*, 7, strides=[7, 1], requires_grad=0, device=cpu)):
  %/Constant_output_0 : Long(4, strides=[1], device=cpu) = onnx::Constant[value= 1  3 -1  7 [ CPULongType{4} ], onnx_name="/Constant"](), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Constant_1_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={4}, onnx_name="/Constant_1"](), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/ConstantOfShape_output_0 : Long(4, strides=[1], device=cpu) = onnx::ConstantOfShape[value={1}, onnx_name="/ConstantOfShape"](%/Constant_1_output_0), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Constant_2_output_0 : Long(requires_grad=0, device=cpu) = onnx::Constant[value={-1}, onnx_name="/Constant_2"](), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Mul_output_0 : Long(4, strides=[1], device=cpu) = onnx::Mul[onnx_name="/Mul"](%/ConstantOfShape_output_0, %/Constant_2_output_0), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Constant_3_output_0 : Long(4, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value= 1  3 -1  7 [ CPULongType{4} ], onnx_name="/Constant_3"](), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Equal_output_0 : Bool(4, strides=[1], device=cpu) = onnx::Equal[onnx_name="/Equal"](%/Constant_3_output_0, %/Mul_output_0), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %/Where_output_0 : Long(4, strides=[1], device=cpu) = onnx::Where[onnx_name="/Where"](%/Equal_output_0, %/ConstantOfShape_output_0, %/Constant_output_0), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  %output : Float(1, 3, *, 7, strides=[0, 0, 7, 1], requires_grad=0, device=cpu) = onnx::Expand[onnx_name="/Expand"](%data, %/Where_output_0), scope: __main__.Model:: # /tmp/ipykernel_393401/573755285.py:6:0
  return (%output)
x = torch.rand(1, 4, 2)
print(x)
x.expand((1, -1, 4, 2))
tensor([[[0.3844, 0.2416],
         [0.1347, 0.9315],
         [0.4349, 0.9622],
         [0.6509, 0.4246]]])
tensor([[[[0.3844, 0.2416],
          [0.1347, 0.9315],
          [0.4349, 0.9622],
          [0.6509, 0.4246]]]])

模型结构

# onnx_program = torch.onnx.dynamo_export(torch_model, x)
# onnx_program.save("test_dynamo.onnx")
import onnx
import tvm
from tvm import relay
onnx_model = onnx.load(f"{temp_dir}/{output_name}.onnx")
mod, params = relay.frontend.from_onnx(onnx_model, {"data": shape}, freeze_params=True)
# with tvm.transform.PassContext(opt_level=3):
#     mod = relay.quantize.prerequisite_optimize(mod, params)
mod.show()
with tvm.transform.PassContext(opt_level=3):
    with relay.quantize.qconfig(
        skip_conv_layers=[],
        # calibrate_mode="kl_divergence", 
        weight_scale="max",
        # round_for_shift=True,
        # rounding="TONEAREST", # "UPWARD" or "TONEAREST"
        # calibrate_skip_layers=[],
        skip_dense_layer=False,
    ):
        qmod = relay.quantize.quantize(mod, params)
qmod.show()
path = "/media/pc/data/board/arria10/lxw/tasks/tools/npu_user_demos/models/telecom/vehile_det_traffic_yolov8n_c3/yolov8n-c3_384_640_.onnx"
import onnx
import tvm
from tvm import relay
onnx_model = onnx.load(path)
mod, params = relay.frontend.from_onnx(onnx_model, {"images": (1, 3, 384, 640)}, freeze_params=True)
# with tvm.transform.PassContext(opt_level=3):
#     mod = relay.quantize.prerequisite_optimize(mod, params)
mod.show()