函数 lifting#
import set_env
/media/pc/data/lxw/ai/tvm
import numpy as np
import tvm
from tvm import relay
from tvm.relay import ExprMutator
from tvm.relay.op.annotation import compiler_begin, compiler_end
from tvm.relay.backend.runtime import Runtime
from tvm.relay.backend import te_compiler
from tvm.contrib.utils import tempdir
def update_lib(lib, source_dir="/media/pc/data/lxw/ai/tvm"):
kwargs = {
"options" : [
"-O2", "-std=c++17",
f"-I{source_dir}/src/runtime/contrib",
f"-I{source_dir}/include",
f"-I{source_dir}/3rdparty/dlpack/include",
f"-I{source_dir}/3rdparty/dmlc-core/include",
]
}
tmp_path = tempdir()
lib_name = "lib.so"
lib_path = tmp_path.relpath(lib_name)
lib.export_library(lib_path, fcompile=False, **kwargs)
lib = tvm.runtime.load_module(lib_path)
return lib
def check_result(
mod,
map_inputs,
out_shape,
result,
tol=1e-5,
target="llvm",
device=tvm.cpu(),
params=None,
runtime=Runtime("cpp"),
):
def check_vm_result():
te_compiler.get().clear()
with tvm.transform.PassContext(opt_level=3):
exe = relay.vm.compile(mod, target=target, params=params)
code, lib = exe.save()
lib = update_lib(lib)
exe = tvm.runtime.vm.Executable.load_exec(code, lib)
vm = tvm.runtime.vm.VirtualMachine(exe, device)
outs = vm.run(**map_inputs)
outs = outs if isinstance(outs, tvm.runtime.container.ADT) else [outs]
results = result if isinstance(result, list) else [result]
for out, ref in zip(outs, results):
np.testing.assert_allclose(out.numpy(), ref, rtol=tol, atol=tol)
check_vm_result()
data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32"))
bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32"))
conv = relay.nn.conv2d(
data=data, weight=weight, kernel_size=(3, 3), channels=16, padding=(1, 1)
)
bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean, bn_mvar)
func = relay.Function(
[data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple()
)
mod = tvm.IRModule()
mod["main"] = func
mod = relay.transform.InferType()(mod)
mod.show()
def @main(%data: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %weight: Tensor[(16, 3, 3, 3), float32] /* ty=Tensor[(16, 3, 3, 3), float32] */, %bn_gamma: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_beta: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_mean: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_var: Tensor[(16), float32] /* ty=Tensor[(16), float32] */) -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
%0 = nn.conv2d(%data, %weight, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */;
nn.batch_norm(%0, %bn_gamma, %bn_beta, %bn_mean, %bn_var) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
}
# 利用 pass 管理器编写简单的注释器白名单
@relay.transform.function_pass(opt_level=0)
class AllowedListAnnotator:
def __init__(self, op_list, compiler):
assert isinstance(op_list, (list, tuple, set))
self.op_list = op_list
self.compiler = compiler
def transform_function(self, func, mod, dev):
annotator = self
class Annotator(tvm.relay.ExprMutator):
def visit_call(self, call):
op_name = call.op.name
if op_name in annotator.op_list:
new_args = []
for arg in call.args:
ann = compiler_begin(super().visit(arg), annotator.compiler)
new_args.append(ann)
new_call = relay.Call(call.op, new_args, call.attrs, call.type_args)
return compiler_end(new_call, annotator.compiler)
else:
return super().visit_call(call)
return Annotator().visit(func)
op_list = ["nn.batch_norm", "nn.conv2d"]
mod = AllowedListAnnotator(op_list, "test_compiler")(mod)
opt_pass = tvm.transform.Sequential(
[
relay.transform.InferType(),
relay.transform.PartitionGraph(),
relay.transform.SimplifyInference(),
relay.transform.FoldConstant(),
relay.transform.AlterOpLayout(),
]
)
with tvm.transform.PassContext(opt_level=3):
mod = opt_pass(mod)
mod.show()
def @main(%data: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %weight: Tensor[(16, 3, 3, 3), float32] /* ty=Tensor[(16, 3, 3, 3), float32] */, %bn_gamma: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_beta: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_mean: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_var: Tensor[(16), float32] /* ty=Tensor[(16), float32] */) -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
%0 = @tvmgen_default_test_compiler_main_0(%data, %weight) /* ty=Tensor[(1, 16, 224, 224), float32] */;
@tvmgen_default_test_compiler_main_2(%0, %bn_gamma, %bn_beta, %bn_mean, %bn_var) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
}
def @tvmgen_default_test_compiler_main_0(%test_compiler_0_i0: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %test_compiler_0_i1: Tensor[(16, 3, 3, 3), float32] /* ty=Tensor[(16, 3, 3, 3), float32] */, Compiler="test_compiler", Primitive=1, Inline=1, global_symbol="tvmgen_default_test_compiler_main_0") -> Tensor[(1, 16, 224, 224), float32] {
nn.conv2d(%test_compiler_0_i0, %test_compiler_0_i1, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */
}
def @tvmgen_default_test_compiler_main_2(%test_compiler_2_i0: Tensor[(1, 16, 224, 224), float32] /* ty=Tensor[(1, 16, 224, 224), float32] */, %test_compiler_2_i1: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_2_i2: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_2_i3: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_2_i4: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, Compiler="test_compiler", Primitive=1, Inline=1, global_symbol="tvmgen_default_test_compiler_main_2") -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
nn.batch_norm(%test_compiler_2_i0, %test_compiler_2_i1, %test_compiler_2_i2, %test_compiler_2_i3, %test_compiler_2_i4) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
}
data = relay.var("data", relay.TensorType((1, 16, 224, 224), "float32"))
bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
bn_mmean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
bn_mvar = relay.var("bn_var", relay.TensorType((16,), "float32"))
bn_output = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean, bn_mvar)
func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple())
mod = tvm.IRModule()
mod["main"] = func
op_list = ["nn.batch_norm", "nn.conv2d"]
mod = AllowedListAnnotator(op_list, "test_compiler")(mod)
opt_pass = tvm.transform.Sequential(
[
relay.transform.InferType(),
relay.transform.PartitionGraph(),
relay.transform.SimplifyInference(),
relay.transform.FoldConstant(),
relay.transform.AlterOpLayout(),
relay.transform.Inline(),
]
)
with tvm.transform.PassContext(opt_level=3):
mod = opt_pass(mod)
mod.show()
def @main(%data: Tensor[(1, 16, 224, 224), float32] /* ty=Tensor[(1, 16, 224, 224), float32] */, %bn_gamma: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_beta: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_mean: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_var: Tensor[(16), float32] /* ty=Tensor[(16), float32] */) -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
%0 = fn (%test_compiler_0_i0: Tensor[(1, 16, 224, 224), float32] /* ty=Tensor[(1, 16, 224, 224), float32] */, %test_compiler_0_i1: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_0_i2: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_0_i3: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_compiler_0_i4: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, Compiler="test_compiler", Primitive=1, Inline=1, global_symbol="tvmgen_default_test_compiler_main_0") -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
nn.batch_norm(%test_compiler_0_i0, %test_compiler_0_i1, %test_compiler_0_i2, %test_compiler_0_i3, %test_compiler_0_i4) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
};
%0(%data, %bn_gamma, %bn_beta, %bn_mean, %bn_var)
}
注解常量折叠#
from tvm.relay.build_module import bind_params_by_name
ones = np.ones(shape=(8, 8), dtype="float32")
x = relay.var("x", shape=(8, 8))
y = relay.var("y", shape=(8, 8))
add = x + y
log = relay.log(add)
f = relay.Function([x, y], log)
f = bind_params_by_name(f, {"x": tvm.nd.array(ones)})
mod = tvm.IRModule()
mod["main"] = f
mod = AllowedListAnnotator(["add"], "ccompiler")(mod)
mod = relay.transform.PartitionGraph()(mod)
mod = relay.transform.InferType()(mod)
mod.show()
y_data = np.random.rand(8, 8).astype("float32")
np_add = ones + y_data
check_result(mod, {"y": y_data}, (8, 8), np.log(np_add))
def @main(%y: Tensor[(8, 8), float32] /* ty=Tensor[(8, 8), float32] */) -> Tensor[(8, 8), float32] {
%0 = @tvmgen_default_ccompiler_main_0(%y) /* ty=Tensor[(8, 8), float32] */;
log(%0) /* ty=Tensor[(8, 8), float32] */
}
def @tvmgen_default_ccompiler_main_0(%ccompiler_0_i1: Tensor[(8, 8), float32] /* ty=Tensor[(8, 8), float32] */, Compiler="ccompiler", Primitive=1, Inline=1, global_symbol="tvmgen_default_ccompiler_main_0") -> Tensor[(8, 8), float32] {
add(meta[relay.Constant][0] /* ty=Tensor[(8, 8), float32] */, %ccompiler_0_i1) /* ty=Tensor[(8, 8), float32] */
}
多输出#
def create_graph():
data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32"))
bn_gamma = relay.var("bn_gamma", relay.TensorType((16,), "float32"))
bn_beta = relay.var("bn_beta", relay.TensorType((16,), "float32"))
bn_mean = relay.var("bn_mean", relay.TensorType((16,), "float32"))
bn_var = relay.var("bn_var", relay.TensorType((16,), "float32"))
data_cb = compiler_begin(data, "test_target")
weight_cb = compiler_begin(weight, "test_target")
bn_gamma_cb = compiler_begin(bn_gamma, "test_target")
bn_beta_cb = compiler_begin(bn_beta, "test_target")
bn_mean_cb = compiler_begin(bn_mean, "test_target")
bn_var_cb = compiler_begin(bn_var, "test_target")
conv_o = relay.nn.conv2d(
data=data_cb, weight=weight_cb, kernel_size=(3, 3), channels=16, padding=(1, 1)
)
bn_o = relay.nn.batch_norm(conv_o, bn_gamma_cb, bn_beta_cb, bn_mean_cb, bn_var_cb)
relu_o = relay.nn.relu(bn_o[0])
relu_o_ce = compiler_end(relu_o, "test_target")
bn_omean = bn_o[1]
rebn_omean_ce = compiler_end(bn_omean, "test_target")
bn_ovar = bn_o[2]
bn_ovar_ce = compiler_end(bn_ovar, "test_target")
dummy_mean_abs = relay.abs(rebn_omean_ce)
dummy_ovar_abs = relay.abs(bn_ovar_ce)
dummy_tuple = relay.Tuple((relu_o_ce, dummy_mean_abs, dummy_ovar_abs))
func = relay.Function([data, weight, bn_gamma, bn_beta, bn_mean, bn_var], dummy_tuple)
return func
mod = tvm.IRModule()
mod["main"] = create_graph()
partitioned = relay.transform.PartitionGraph()(mod)
partitioned.show()
def @main(%data: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %weight: Tensor[(16, 3, 3, 3), float32] /* ty=Tensor[(16, 3, 3, 3), float32] */, %bn_gamma: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_beta: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_mean: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %bn_var: Tensor[(16), float32] /* ty=Tensor[(16), float32] */) -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
%0 = @tvmgen_default_test_target_main_0(%data, %weight, %bn_gamma, %bn_beta, %bn_mean, %bn_var) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */;
%1 = %0.1 /* ty=Tensor[(16), float32] */;
%2 = %0.2 /* ty=Tensor[(16), float32] */;
%3 = %0.0 /* ty=Tensor[(1, 16, 224, 224), float32] */;
%4 = abs(%1) /* ty=Tensor[(16), float32] */;
%5 = abs(%2) /* ty=Tensor[(16), float32] */;
(%3, %4, %5) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
}
def @tvmgen_default_test_target_main_0(%test_target_0_i0: Tensor[(1, 3, 224, 224), float32] /* ty=Tensor[(1, 3, 224, 224), float32] */, %test_target_0_i1: Tensor[(16, 3, 3, 3), float32] /* ty=Tensor[(16, 3, 3, 3), float32] */, %test_target_0_i2: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_target_0_i3: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_target_0_i4: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, %test_target_0_i5: Tensor[(16), float32] /* ty=Tensor[(16), float32] */, Compiler="test_target", Primitive=1, Inline=1, global_symbol="tvmgen_default_test_target_main_0") -> (Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) {
%6 = nn.conv2d(%test_target_0_i0, %test_target_0_i1, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, 224, 224), float32] */;
%7 = nn.batch_norm(%6, %test_target_0_i2, %test_target_0_i3, %test_target_0_i4, %test_target_0_i5) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */;
%8 = %7.0 /* ty=Tensor[(1, 16, 224, 224), float32] */;
%9 = nn.relu(%8) /* ty=Tensor[(1, 16, 224, 224), float32] */;
%10 = %7.1 /* ty=Tensor[(16), float32] */;
%11 = %7.2 /* ty=Tensor[(16), float32] */;
(%9, %10, %11) /* ty=(Tensor[(1, 16, 224, 224), float32], Tensor[(16), float32], Tensor[(16), float32]) */
}