simplify_fc_transpose()
#
import itertools
import numpy as np
import scipy.sparse as sp
import tvm
from tvm.ir import IRModule
from tvm import relay
from tvm.relay.data_dep_optimization import simplify_fc_transpose
def run_func(func, params, x):
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(func, "llvm", params=params)
from tvm.contrib import graph_executor
dev = tvm.cpu(0)
dtype = "float32"
m = graph_executor.GraphModule(lib["default"](dev))
# set inputs
m.set_input("data", tvm.nd.array(x.astype(dtype)))
# execute
m.run()
# get outputs
tvm_output = m.get_output(0)
return tvm_output.numpy()
data = relay.var("data", shape=(1, 32), dtype="float32")
x = relay.nn.relu(data)
w1 = relay.var("w1", shape=(32, 64), dtype="float32")
y = relay.nn.dense(x, relay.transpose(w1, axes=[1, 0]))
z = relay.nn.relu(y)
w2 = relay.var("w2", shape=(64, 16), dtype="float32")
zz = relay.nn.dense(z, relay.transpose(w2, axes=[1, 0]))
func = relay.Function(relay.analysis.free_vars(zz), zz)
print(func)
fn (%data: Tensor[(1, 32), float32], %w1: Tensor[(32, 64), float32], %w2: Tensor[(64, 16), float32]) {
%0 = nn.relu(%data);
%1 = transpose(%w1, axes=[1, 0]);
%2 = nn.dense(%0, %1, units=None);
%3 = nn.relu(%2);
%4 = transpose(%w2, axes=[1, 0]);
nn.dense(%3, %4, units=None)
}
params = {
"w1": tvm.nd.array(np.random.uniform(-1, 1, (32, 64)).astype("float32")),
"w2": tvm.nd.array(np.random.uniform(-1, 1, (64, 16)).astype("float32")),
}
x_np = np.random.randn(1, 32).astype("float32")
old_result = run_func(func, params, x_np)
new_func, new_params = simplify_fc_transpose.convert(func, params)
One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.
print(new_func)
fn (%data: Tensor[(1, 32), float32] /* ty=Tensor[(1, 32), float32] */, %w1.T: Tensor[(64, 32), float32] /* ty=Tensor[(64, 32), float32] */, %w2.T: Tensor[(16, 64), float32] /* ty=Tensor[(16, 64), float32] */) -> Tensor[(1, 16), float32] {
%0 = nn.relu(%data) /* ty=Tensor[(1, 32), float32] */;
%1 = nn.dense(%0, %w1.T, units=None) /* ty=Tensor[(1, 64), float32] */;
%2 = nn.relu(%1) /* ty=Tensor[(1, 64), float32] */;
nn.dense(%2, %w2.T, units=None) /* ty=Tensor[(1, 16), float32] */
} /* ty=fn (Tensor[(1, 32), float32], Tensor[(64, 32), float32], Tensor[(16, 64), float32]) -> Tensor[(1, 16), float32] */
new_result = run_func(new_func, new_params, x_np)
np.testing.assert_allclose(old_result, new_result, atol=1e-5, rtol=1e-5)