计算图分割(old)#
from pathlib import Path
import numpy as np
import tvm
from tvm.relay.backend import te_compiler
from tvm.relay.backend.runtime import Runtime
import tvm.relay.testing
import tvm.relay.op as reg
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import byoc
from tvm.contrib import utils
from tvm.relay.expr_functor import ExprMutator
from tvm.relay.op.annotation import compiler_begin, compiler_end
from tvm.relay.op.contrib.register import get_pattern_table
from tvm.relay.build_module import bind_params_by_name
def set_func_attr(func, compile_name, symbol_name):
func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
func = func.with_attr("Compiler", compile_name)
func = func.with_attr("global_symbol", symbol_name)
return func
def update_lib(lib, source_dir="/media/pc/data/lxw/ai/tvm"):
kwargs = {
"options" : [
"-O2", "-std=c++17",
f"-I{source_dir}/src/runtime/contrib",
f"-I{source_dir}/include",
f"-I{source_dir}/3rdparty/dlpack/include",
f"-I{source_dir}/3rdparty/dmlc-core/include",
]
}
tmp_path = utils.tempdir()
lib_name = "lib.so"
lib_path = tmp_path.relpath(lib_name)
lib.export_library(lib_path, fcompile=False, **kwargs)
lib = tvm.runtime.load_module(lib_path)
return lib
class MobileNetAnnotator(ExprMutator):
"""
Annotate mobilenet until global_avg_pool.
"""
def __init__(self, compiler):
super(MobileNetAnnotator, self).__init__()
self.compiler = compiler
self.compiler_open = False
def visit_call(self, call):
if call.op.name == "nn.global_avg_pool2d":
self.compiler_open = True
compiler_open = self.compiler_open
params = []
for arg in call.args:
param = super().visit(arg)
if call.op.name == "nn.global_avg_pool2d":
param = compiler_end(param, self.compiler)
if compiler_open and isinstance(param, relay.expr.Var):
param = compiler_begin(param, self.compiler)
params.append(param)
new_call = relay.Call(call.op, params, call.attrs)
return new_call
def check_result(
mod,
map_inputs,
out_shape,
result,
tol=1e-5,
target="llvm",
device=tvm.cpu(),
params=None,
runtime=Runtime("cpp"),
):
def check_vm_result():
te_compiler.get().clear()
with tvm.transform.PassContext(opt_level=3):
exe = relay.vm.compile(mod, target=target, params=params)
code, lib = exe.save()
lib = update_lib(lib)
exe = tvm.runtime.vm.Executable.load_exec(code, lib)
vm = tvm.runtime.vm.VirtualMachine(exe, device)
outs = vm.run(**map_inputs)
outs = outs if isinstance(outs, tvm.runtime.container.ADT) else [outs]
results = result if isinstance(result, list) else [result]
for out, ref in zip(outs, results):
tvm.testing.assert_allclose(out.numpy(), ref, rtol=tol, atol=tol)
check_vm_result()
mixed_single_multiple_outputs#
def create_graph():
data = relay.var("data", shape=(10, 10))
cb_1 = compiler_begin(data, "test_target")
O_1 = relay.abs(cb_1)
ce_2 = compiler_end(O_1, "test_target")
O_2 = relay.nn.relu(O_1)
ce_3 = compiler_end(O_2, "test_target")
X = relay.tanh(ce_2)
cb_3 = compiler_begin(ce_3, "test_target")
cb_4 = compiler_begin(X, "test_target")
O_3 = relay.add(cb_3, cb_4)
ce_4 = compiler_end(O_3, "test_target")
func = relay.Function([data], ce_4)
return func
mod = tvm.IRModule()
mod["main"] = create_graph()
mod = transform.InferType()(mod)
partitioned = transform.PartitionGraph()(mod)
multiple_use_of_an_output#
def test_multiple_use_of_an_output():
def expected_same_output_region():
mod = tvm.IRModule()
x = relay.var("x", shape=(8, 8))
y = relay.var("y", shape=(8, 8))
z = relay.var("z", shape=(8, 8))
x0 = relay.var("x0", shape=(8, 8))
y0 = relay.var("y0", shape=(8, 8))
log = relay.log(x0)
sub = x0 - y0
mul = log * sub
# The partitioned graph contains log, subtract, and multiply
func = relay.Function([x0, y0], mul)
func = set_func_attr(func, "ccompiler", "tvmgen_default_ccompiler_main_0")
glb_0 = relay.GlobalVar("tvmgen_default_ccompiler_main_0")
mod[glb_0] = func
mod = transform.InferType()(mod)
add = x + y
call = relay.Call(glb_0, [add, z])
main = relay.Function([x, y, z], call)
mod["main"] = main
mod = transform.InferType()(mod)
return mod
def expected_different_output_region():
mod = tvm.IRModule()
x = relay.var("x", shape=(8, 8))
y = relay.var("y", shape=(8, 8))
z = relay.var("z", shape=(8, 8))
# The partitioned graph contains log
i0 = relay.var("i0", shape=(8, 8))
log = relay.log(i0)
func = relay.Function([i0], log)
func = set_func_attr(func, "ccompiler", "tvmgen_default_ccompiler_main_0")
glb_0 = relay.GlobalVar("tvmgen_default_ccompiler_main_0")
mod[glb_0] = func
mod = transform.InferType()(mod)
# The partitioned graph contains subtract
x0 = relay.var("x0", shape=(8, 8))
y0 = relay.var("y0", shape=(8, 8))
sub = x0 - y0
func = relay.Function([x0, y0], sub)
func = set_func_attr(func, "ccompiler", "tvmgen_default_ccompiler_main_1")
glb_1 = relay.GlobalVar("tvmgen_default_ccompiler_main_1")
mod[glb_1] = func
mod = transform.InferType()(mod)
add = x + y
call_log = relay.Call(glb_0, [add])
call_sub = relay.Call(glb_1, [add, z])
main = relay.Function([x, y, z], call_log * call_sub)
mod["main"] = main
mod = transform.InferType()(mod)
return mod
def get_mod():
x = relay.var("x", shape=(8, 8))
y = relay.var("y", shape=(8, 8))
z = relay.var("z", shape=(8, 8))
add = x + y
sub = add - z
log = relay.log(add)
sub1 = log * sub
f = relay.Function([x, y, z], sub1)
mod = tvm.IRModule()
mod["main"] = f
return mod
def test_same_output_region():
mod = get_mod()
mod = AllowedListAnnotator(["subtract", "log", "multiply"], "ccompiler")(mod)
mod = transform.MergeCompilerRegions()(mod)
mod = transform.PartitionGraph()(mod)
expected_mod = expected_same_output_region()
assert tvm.ir.structural_equal(mod, expected_mod, map_free_vars=True)
def test_different_output_region():
mod = get_mod()
mod = AllowedListAnnotator(["subtract", "log"], "ccompiler")(mod)
mod = transform.MergeCompilerRegions()(mod)
mod = transform.PartitionGraph()(mod)
expected_mod = expected_different_output_region()
assert tvm.ir.structural_equal(mod, expected_mod, map_free_vars=True)
test_same_output_region()
test_different_output_region()
test_multiple_use_of_an_output()
def test_duplicate_outputs():
target = "test_duplicate_outputs"
@tvm.ir.register_op_attr("abs", "target." + target)
def abs(expr): # pylint: disable=unused-variable
return True
def create_graph():
data = relay.var("data", shape=(10, 10))
x = relay.abs(data)
out_1 = relay.nn.relu(x)
out_2 = relay.tanh(x)
out_3 = relay.log(x)
out = relay.Tuple([out_1, out_2, out_3])
func = relay.Function([data], out)
return func
def expected():
mod = tvm.IRModule()
# function 0
f0_i0 = relay.var(target + "_0_i0", shape=(10, 10))
f0_o0 = relay.abs(f0_i0)
func0 = relay.Function([f0_i0], f0_o0)
func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1))
func0 = func0.with_attr("Compiler", target)
func0 = func0.with_attr("global_symbol", "tvmgen_default_" + target + "_main_0")
gv0 = relay.GlobalVar("tvmgen_default_" + target + "_main_0")
mod[gv0] = func0
mod = transform.InferType()(mod)
# body
data = relay.var("data", shape=(10, 10))
function_out = gv0(data)
out_1 = relay.nn.relu(function_out)
out_2 = relay.tanh(function_out)
out_3 = relay.log(function_out)
out = relay.Tuple([out_1, out_2, out_3])
func = relay.Function([data], out)
mod["main"] = func
mod = transform.InferType()(mod)
return mod
mod = tvm.IRModule()
mod["main"] = create_graph()
seq = tvm.transform.Sequential(
[
transform.AnnotateTarget(target),
transform.MergeCompilerRegions(),
transform.PartitionGraph(),
]
)
ref_mod = expected()
partitioned = seq(mod)
assert tvm.ir.structural_equal(partitioned, ref_mod, map_free_vars=True)
test_duplicate_outputs()
def test_constant_tuples():
@tvm.ir.register_op_attr("qnn.concatenate", "target.const_tuples")
def add(expr): # pylint: disable=unused-variable
return True
def create_graph():
a = relay.var("a", shape=(10, 10), dtype="uint8")
b = relay.var("b", shape=(10, 10), dtype="uint8")
a1 = relay.abs(a)
zeroi = relay.const(1, "int32")
zerof = relay.const(0, "float32")
con = relay.qnn.op.concatenate(
(a1, b),
input_scales=(zerof, zerof),
input_zero_points=(zeroi, zeroi),
output_scale=zerof,
output_zero_point=zeroi,
axis=1,
)
f = relay.Function([a, b], con)
mod = tvm.IRModule.from_expr(f)
mod = transform.InferType()(mod)
return mod
seq = tvm.transform.Sequential(
[
transform.AnnotateTarget("const_tuples"),
transform.InferType(),
transform.MergeCompilerRegions(),
transform.PartitionGraph(),
]
)
partitioned = seq(create_graph())
concat = partitioned["tvmgen_default_const_tuples_main_0"].body
assert type(concat.args[1]) == relay.Tuple
assert type(concat.args[2]) == relay.Tuple
assert type(concat.args[3]) == relay.Constant
assert type(concat.args[4]) == relay.Constant
def test_flatten_tuple_output():
target = "test_flatten_tuple_output"
@tvm.ir.register_op_attr("split", "target." + target)
def split(expr): # pylint: disable=unused-variable
return True
@tvm.ir.register_op_attr("abs", "target." + target)
def abs(expr): # pylint: disable=unused-variable
return True
def create_graph():
a = relay.var("a", shape=(10, 10), dtype="uint8")
a_split = relay.split(a, 2)
a_split_0 = relay.TupleGetItem(a_split.astuple(), 0)
a_split_0_abs = relay.abs(a_split_0)
a_con = relay.concatenate(a_split, 0)
a_split_0_relu = relay.nn.relu(a_split_0_abs)
out = relay.Tuple((a_con, a_split_0_relu))
f = relay.Function([a], out)
mod = tvm.IRModule.from_expr(f)
mod = transform.InferType()(mod)
return mod
def expected():
mod = tvm.IRModule()
# function 0
f0_i0 = relay.var(target + "_0_i0", shape=(10, 10), dtype="uint8")
a_split = relay.split(f0_i0, 2)
a_split_0 = relay.TupleGetItem(a_split.astuple(), 0)
a_split_1 = relay.TupleGetItem(a_split.astuple(), 1)
a_split_abs_in = relay.TupleGetItem(a_split.astuple(), 0)
abs = relay.abs(a_split_abs_in)
tuple_out = relay.Tuple((a_split_0, a_split_1, abs))
func0 = relay.Function([f0_i0], tuple_out)
func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1))
func0 = func0.with_attr("Compiler", target)
func0 = func0.with_attr("global_symbol", "tvmgen_default_" + target + "_main_0")
gv0 = relay.GlobalVar("tvmgen_default_" + target + "_main_0")
mod[gv0] = func0
mod = transform.InferType()(mod)
# body
data = relay.var("a", shape=(10, 10), dtype="uint8")
f_out = gv0(data)
f_out_0 = relay.TupleGetItem(f_out, 0)
f_out_1 = relay.TupleGetItem(f_out, 1)
tuple = relay.Tuple((f_out_0, f_out_1))
concat = relay.concatenate(tuple, 0)
f_out_2 = relay.TupleGetItem(f_out, 2)
relu = relay.nn.relu(f_out_2)
ret_tuple = relay.Tuple((concat, relu))
mod["main"] = relay.Function([data], ret_tuple)
mod = transform.InferType()(mod)
return mod
seq = tvm.transform.Sequential(
[
transform.AnnotateTarget(target),
transform.MergeCompilerRegions(),
transform.PartitionGraph(),
]
)
partitioned = seq(create_graph())
partitioned = transform.InferType()(partitioned)
expected_mod = transform.InferType()(expected())
assert tvm.ir.structural_equal(partitioned, expected_mod, map_free_vars=True)
def test_tuple_output_exec():
"""Test C codegen and runtime for a subgraph with a tuple output"""
a = relay.var("a", shape=(10, 10), dtype="float32")
b = relay.var("b", shape=(10, 10), dtype="float32")
ba = relay.annotation.compiler_begin(a, "ccompiler")
bb = relay.annotation.compiler_begin(b, "ccompiler")
add = relay.add(ba, bb)
sub = relay.subtract(ba, bb)
out = relay.Tuple((add, sub))
eout = relay.annotation.compiler_end(out, "ccompiler")
func = relay.Function([a, b], eout)
mod = tvm.IRModule()
mod["main"] = func
mod = transform.InferType()(mod)
mod = transform.PartitionGraph()(mod)
a_data = np.random.rand(10, 10).astype("float32")
b_data = np.random.rand(10, 10).astype("float32")
check_result(
mod,
{"a": a_data, "b": b_data},
[(10, 10), (10, 10)],
[(a_data + b_data), (a_data - b_data)],
)
def test_extern_opt():
def Optimize(mod):
return relay.transform.FoldConstant()(mod)
tvm.register_func("relay.ext.test_target.optimize", Optimize)
x = relay.var("x", shape=(2, 2))
y0 = relay.var("y0", shape=(2, 2))
y1 = relay.var("y1", shape=(2, 2))
yy0 = relay.annotation.compiler_begin(y0, "test_target")
yy1 = relay.annotation.compiler_begin(y1, "test_target")
z = yy0 + yy1
end = relay.annotation.compiler_end(z, "test_target")
f = relay.Function([x, y0, y1], end * x)
c = np.ones(shape=(2, 2), dtype="float32")
f = bind_params_by_name(f, {"y0": tvm.nd.array(c), "y1": tvm.nd.array(c)})
mod = tvm.IRModule()
mod["main"] = f
mod = transform.InferType()(mod)
mod = transform.PartitionGraph()(mod)
try:
t0 = mod["tvmgen_default_test_target_main_0"]
except:
raise KeyError("test_target_main_0 not found")
assert isinstance(t0.body, relay.Constant)
expected = np.empty([2, 2])
expected.fill(2)
tvm.testing.assert_allclose(t0.body.data.numpy(), expected, rtol=1e-5, atol=1e-5)
def test_preserve_type_import():
"""Test to make sure type definition and imports are preserved during the BYOC pipeline."""
from tvm.relay.prelude import Prelude, StaticTensorArrayOps
def run(dtype, shape):
mod = tvm.IRModule()
p = Prelude(mod)
static_tensor_array_ops = StaticTensorArrayOps(p, dtype, shape)
static_tensor_array_ops.register()
tensor_array = p.get_global_var_static("tensor_array", dtype, shape)
tensor = p.get_tensor_ctor_static("tensor_constructor", dtype, shape)
write = p.get_global_var_static("tensor_array_write", dtype, shape)
gather = p.get_global_var_static("tensor_array_gather", dtype, shape)
v = relay.var("v")
indice = relay.var("indice")
init_tensor_array = tensor_array(relay.const(3))
tensor_array1 = write(init_tensor_array, relay.const(0), tensor(v))
tensor_array2 = write(tensor_array1, relay.const(1), tensor(v))
tensor_array3 = write(tensor_array2, relay.const(2), tensor(v))
out = gather(tensor_array3, indice)
mod["main"] = relay.Function([v, indice], out)
mod = transform.RemoveUnusedFunctions()(mod)
mod = transform.PartitionGraph()(mod)
run("float32", [2, 3])
def test_not_bind_constant():
def get_net(prefix, data, out_channel):
weight = relay.var(prefix + "weight")
bn_gamma = relay.var(prefix + "bn_gamma")
bn_beta = relay.var(prefix + "bn_beta")
bn_mmean = relay.var(prefix + "bn_mean")
bn_mvar = relay.var(prefix + "bn_var")
layer = relay.nn.conv2d(
data=data, weight=weight, kernel_size=(3, 3), channels=out_channel, padding=(1, 1)
)
bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar)
out = relay.nn.relu(bn_output[0])
return relay.Function(relay.analysis.free_vars(out), out)
def get_partitoned_mod(mod, params, pattern_table, bind_constants):
mod["main"] = bind_params_by_name(mod["main"], params)
remove_bn_pass = tvm.transform.Sequential(
[
transform.InferType(),
transform.SimplifyInference(),
transform.FoldConstant(),
transform.FoldScaleAxis(),
]
)
composite_partition = tvm.transform.Sequential(
[
remove_bn_pass,
transform.MergeComposite(pattern_table),
transform.AnnotateTarget("dnnl"),
transform.PartitionGraph(bind_constants=bind_constants),
]
)
with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
return composite_partition(mod)
data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
net = get_net("block_", data, 8)
mod, params = tvm.relay.testing.create_workload(net)
mod = get_partitoned_mod(mod, params, get_pattern_table("dnnl"), bind_constants=True)
len(mod["main"].body.args) == 1
mod = get_partitoned_mod(mod, params, get_pattern_table("dnnl"), bind_constants=False)
len(mod["main"].body.args) == 3