TVM Graph JSON 简介#
导航
解读 TVM 通过 tvm/src/relay/backend/graph_executor_codegen.cc
编译(tvm.relay.build()
或者 vta.build()
)生成的库 lib
保存的计算图信息 lib.graph_json
。
lib.graph_json
包含信息如下:
节点是占位符或可计算节点。nodes
存储为列表。节点包含以下信息:
op
:运算类型,null
意味着它是占位符/变量/输入节点,tvm_op
意味着这个节点可以被执行name
:节点名字inputs
:此运算的 inputs 位置,inputs 是包含 (nodeid, index, version) 的元组列表。(可选)attrs
:包含以下信息的节点属性flatten_data
:是否需要在执行前将数据扁平化(flattened)func_name
:融合函数名,对应于 Relay 编译过程生成的库中的符号。num_inputs
:此节点的inputs
个数num_outputs
:此节点产生的 outputs 个数
参数节点的索引列表,它是计算图的占位符/变量/输入节点 或 constant/param。
此运算的输出节点的位置列表。
存储 forward 路径的历史,所以推断任务中可以跳过某些算子来构建子图。
可以包含版本号或类似的有用信息。
storage_id
:存储布局中每个节点的内存 slot id。将参数名称映射到一对 ({storage_id
:tvm.runtime.NDArray
})。在运行时,可以使用storage_id
查找参数。dtype
:每个节点的数据类型 (enum 值)。dltype
:每个节点的数据类型按顺序排列。shape
:每个节点的形状 k 阶。device_index
:按顺序为每个节点分配设备。
下面以向量加法为例说明:
import tvm
from tvm import relay
type_annotation = relay.TensorType(shape=(5, 5),
dtype="float32")
def add(a, b):
add_op = a + b
return relay.Function([a, b],
add_op,
ret_type=type_annotation,
type_params=None)
a, b = [relay.var(name, type_annotation) for name in "ab"]
mod = tvm.IRModule.from_expr(add(a, b))
rt_lib = relay.build(mod, target="llvm")
rt_lib.graph_json
存储为字符串:
type(rt_lib.graph_json)
str
查看构建的计算图:
print(rt_lib.ir_mod)
def @main(%a: Tensor[(5, 5), float32], %b: Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] {
add(%a, %b)
}
查看函数元数据:
print(rt_lib.function_metadata)
{"tvmgen_default_fused_add": FunctionInfoNode(
workspace_sizes={llvm -keys=cpu : 0},
io_sizes={llvm -keys=cpu : 100},
constant_sizes={llvm -keys=cpu : 0},
tir_primfuncs={llvm -keys=cpu : PrimFunc([p0, p1, T_add]) attrs={"from_legacy_te_schedule": (bool)1, "global_symbol": "tvmgen_default_fused_add", "tir.noalias": (bool)1, "hash": "f01462d5c0c6f96c"} {
parallel (ax0, 0, 5) {
let cse_var_1 = (ax0*5)
T_add[ramp(cse_var_1, 1, 5)] = (p0[ramp(cse_var_1, 1, 5)] + p1[ramp(cse_var_1, 1, 5)])
}
}
},
relay_primfuncs={llvm -keys=cpu : fn (%p0: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, %p1: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, hash="f01462d5c0c6f96c", prim_funcs={'tvmgen_default_fused_add'=meta[tir.PrimFunc][0]}, target=meta[Target][0], Primitive=1, prim_fn_var='tvmgen_default_fused_add') -> Tensor[(5, 5), float32] {
add(%p0, %p1) /* ty=Tensor[(5, 5), float32] */
} /* ty=fn (Tensor[(5, 5), float32], Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] */
}), "__tvm_main__": FunctionInfoNode(
workspace_sizes={llvm -keys=cpu : 0},
io_sizes={llvm -keys=cpu : 300},
constant_sizes={llvm -keys=cpu : 0},
tir_primfuncs={},
relay_primfuncs={llvm -keys=cpu : fn (%a {virtual_device=VirtualDevice(device_type=1, virtual_device_id=0, target=Target(id=35b3aa0, kind='llvm', keys={'cpu'}, host=Target(id=35b3990, kind='llvm', keys={'cpu'})))}: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, %b {virtual_device=VirtualDevice(device_type=1, virtual_device_id=0, target=Target(id=35b3aa0, kind='llvm', keys={'cpu'}, host=Target(id=35b3990, kind='llvm', keys={'cpu'})))}: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, executor=meta[Executor][0], runtime=meta[Runtime][0], hash="4fcdf772a04eb1ba", virtual_device=VirtualDevice(device_type=1, virtual_device_id=0, target=Target(id=35b3aa0, kind='llvm', keys={'cpu'}, host=Target(id=35b3990, kind='llvm', keys={'cpu'})))) -> Tensor[(5, 5), float32] {
%0 = fn (%p0: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, %p1: Tensor[(5, 5), float32] /* ty=Tensor[(5, 5), float32] */, Primitive=1, hash="f01462d5c0c6f96c") -> Tensor[(5, 5), float32] {
add(%p0, %p1) /* ty=Tensor[(5, 5), float32] */
} /* ty=fn (Tensor[(5, 5), float32], Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] */;
%0(%a, %b) /* ty=Tensor[(5, 5), float32] */
} /* ty=fn (Tensor[(5, 5), float32], Tensor[(5, 5), float32]) -> Tensor[(5, 5), float32] */
})}
使用 toml
查看可读性更好:
import toml
print(toml.dumps(eval(rt_lib.graph_json)))
arg_nodes = [ 0, 1,]
heads = [ [ 2, 0, 0,],]
node_row_ptr = [ 0, 1, 2, 3,]
[[nodes]]
op = "null"
name = "a"
inputs = []
[[nodes]]
op = "null"
name = "b"
inputs = []
[[nodes]]
op = "tvm_op"
name = "tvmgen_default_fused_add"
inputs = [ [ 0, 0, 0,], [ 1, 0, 0,],]
[nodes.attrs]
num_outputs = "1"
num_inputs = "2"
flatten_data = "0"
func_name = "tvmgen_default_fused_add"
hash = "f01462d5c0c6f96c"
[attrs]
dltype = [ "list_str", [ "float32", "float32", "float32",],]
device_index = [ "list_int", [ 1, 1, 1,],]
storage_id = [ "list_int", [ 0, 1, 2,],]
shape = [ "list_shape", [ [ 5, 5,], [ 5, 5,], [ 5, 5,],],]