# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np
import tvm
from tvm import te
from tvm import relay, runtime
from tvm.contrib.nvcc import have_fp16
import tvm.testing
def test_basic_build():
tgt = "llvm"
dev = tvm.cpu()
# func
a = relay.var("a", dtype="float32", shape=(16, 8))
b = relay.var("b", dtype="float32", shape=(8, 8))
c = relay.var("c", dtype="float32", shape=(16, 8))
x = relay.nn.dense(a, b)
y = relay.nn.relu(x)
z = y + c
func = relay.Function([a, b, c], z)
A = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
B = tvm.nd.array(np.random.uniform(-1, 1, (8, 8)).astype("float32"), device=dev)
C = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), device=dev)
params = {"b": B, "c": C}
# build
targets = {tvm.tir.IntImm("int32", dev.device_type): tgt}
mod = tvm.IRModule.from_expr(func)
func_in_mod = mod["main"]
assert mod["main"] == func_in_mod, "cannot compare function to itself"
lib = relay.build(mod, targets, "llvm", params=params)
assert mod["main"] == func_in_mod, "relay.build changed module in-place"
# test
rt = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
rt.set_input("a", A)
rt.run()
out = rt.get_output(0)
np.testing.assert_allclose(
out.numpy(),
np.maximum(np.dot(A.numpy(), B.numpy().T), 0) + C.numpy(),
atol=1e-5,
rtol=1e-5,
)
@tvm.testing.requires_cuda
def test_fp16_build():
dtype = "float16"
dev = tvm.cuda(0)
if dtype == "float16" and not have_fp16(dev.compute_version):
print("skip because gpu does not support fp16")
return
x = relay.var("x", dtype=dtype, shape=(4, 4))
y = relay.var("y", dtype=dtype, shape=(4, 4))
z = x + y
func = relay.Function([x, y], z)
X = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
Y = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), device=dev)
params = {
"x": X,
"y": Y,
}
# build
g_json, mmod, params = relay.build(func, "cuda", params=params)
# test
rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
rt.load_params(runtime.save_param_dict(params))
rt.run()
out = rt.get_output(0)
np.testing.assert_allclose(out.numpy(), X.numpy() + Y.numpy(), atol=1e-5, rtol=1e-5)
@tvm.testing.requires_llvm
def test_bf16_build():
data = relay.var("data", shape=(1, 3, 224, 224), dtype="float32")
weight = relay.var("weight", shape=(64, 3, 7, 7), dtype="float32")
bn_gamma = relay.var("gamma", shape=(64,), dtype="float32")
bn_beta = relay.var("beta", shape=(64,), dtype="float32")
bn_mean = relay.var("mean", shape=(64,), dtype="float32")
bn_var = relay.var("var", shape=(64,), dtype="float32")
params = {
"weight": np.random.uniform(-1, 1, size=(64, 3, 7, 7)).astype("float32"),
"gamma": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
"beta": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
"mean": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
"var": np.random.uniform(-1, 1, size=(64,)).astype("float32"),
}
conv_bf16 = relay.nn.conv2d(
relay.cast(data, "bfloat16"),
relay.cast(weight, "bfloat16"),
strides=(2, 2),
padding=(3, 3, 3, 3),
channels=64,
kernel_size=(7, 7),
out_dtype="bfloat16",
)
bn_bf16 = relay.nn.batch_norm(
conv_bf16,
relay.cast(bn_gamma, "bfloat16"),
relay.cast(bn_beta, "bfloat16"),
relay.cast(bn_mean, "bfloat16"),
relay.cast(bn_var, "bfloat16"),
)
relu_bf16 = relay.nn.relu(bn_bf16[0])
maxpool_bf16 = relay.nn.max_pool2d(relu_bf16, pool_size=(2, 2), strides=(2, 2))
avgpool_bf16 = relay.nn.avg_pool2d(maxpool_bf16, pool_size=(2, 2), strides=(2, 2))
flattened_bf16 = relay.nn.batch_flatten(avgpool_bf16)
softmax_bf16 = relay.nn.softmax(flattened_bf16)
mod_bf16 = tvm.IRModule.from_expr(softmax_bf16)
with tvm.transform.PassContext(opt_level=3):
relay.build(mod_bf16, target="llvm", params=params)
@tvm.testing.parametrize_targets("llvm", "cuda")
def test_fp16_conversion(target, dev):
if target == "cuda" and not have_fp16(dev.compute_version):
print("skip because gpu does not support fp16")
return
n = 10
for (src, dst) in [("float32", "float16"), ("float16", "float32")]:
x = relay.var("x", relay.TensorType((n,), src))
y = x.astype(dst)
func = relay.Function([x], y)
# init input
X = tvm.nd.array(n * np.random.randn(n).astype(src) - n / 2)
# build
with tvm.transform.PassContext(opt_level=1):
g_json, mmod, params = relay.build(tvm.IRModule.from_expr(func), target)
# test
rt = tvm.contrib.graph_executor.create(g_json, mmod, dev)
rt.set_input("x", X)
rt.run()
out = rt.get_output(0)
np.testing.assert_allclose(out.numpy(), X.numpy().astype(dst), atol=1e-5, rtol=1e-5)
if __name__ == "__main__":
test_basic_build()
test_fp16_build()
test_fp16_conversion()
test_bf16_build()