测试 profiler#
import numpy as np
import tvm
import tvm.testing
from tvm.runtime.vm import VirtualMachine
from tvm import relax, rpc
from tvm.contrib import utils
from tvm.relax.testing import nn
from tvm.script import relax as R
def get_exec(data_shape):
builder = relax.BlockBuilder()
weight1_np = np.random.randn(64, 64).astype("float32")
weight2_np = np.random.randn(64, 64).astype("float32")
with builder.function("main"):
model = nn.Sequential(
nn.Linear(data_shape[1], weight1_np.shape[0], bias=False),
nn.ReLU(),
nn.Linear(weight2_np.shape[0], weight2_np.shape[1], bias=False),
nn.ReLU(),
)
data = nn.Placeholder(data_shape, name="data")
output = model(data)
params = [data] + model.parameters()
builder.emit_func_output(output, params=params)
mod = builder.get()
params = {"linear_weight": weight1_np, "linear_weight1": weight2_np}
mod = relax.transform.BindParams("main", params)(mod)
target = "llvm"
return tvm.compile(mod, target)
测试 conv2d cpu#
data_np = np.random.randn(1, 64).astype("float32")
ex = get_exec(data_np.shape)
vm = VirtualMachine(ex, tvm.cpu(), profile=True)
report = vm.profile("main", tvm.nd.array(data_np))
print(report)
assert "Duration" in str(report)
assert "matmul" in str(report)
Name Duration (us) Percent Device Count Argument Shapes
matmul 12.94 15.70 cpu0 2 float32[1, 64], float32[64, 64], float32[1, 64]
vm.builtin.check_tensor_info 1.61 1.95 cpu0 1 float32[1, 64]
relu 1.43 1.74 cpu0 2 float32[1, 64], float32[1, 64]
vm.builtin.match_shape 1.26 1.53 cpu0 1 float32[1, 64]
----------
Sum 17.23 20.92 6
Total 82.39 cpu0 1
Configuration
-------------
Number of threads: 24
Executor: VM
测试 rpc#
def with_rpc(ex, f, data_np):
temp = utils.tempdir()
path = temp.relpath("vm_library.so")
ex.export_library(path)
server = rpc.Server("127.0.0.1")
remote = rpc.connect(server.host, server.port, session_timeout=10)
remote.upload(path)
rexec = remote.load_module("vm_library.so")
device = remote.cpu()
vm = VirtualMachine(rexec, device=device, profile=True)
data = tvm.nd.array(data_np, device)
f(vm, data)
data_np = np.random.randn(1, 64).astype("float32")
ex = get_exec(data_np.shape)
def callback(vm, data):
vm.profile("main", data)
vm.set_input("main", data)
report = vm.profile("main")
assert "matmul" in str(report)
print(report)
with_rpc(ex, callback, data_np)
2025-08-25 16:25:51.924 INFO bind to 127.0.0.1:9091
2025-08-25 16:25:51.925 INFO connected from ('127.0.0.1', 36662)
2025-08-25 16:25:51.926 INFO start serving at /tmp/tmptnsl0kx3
2025-08-25 16:25:51.941 INFO load_module /tmp/tmptnsl0kx3/vm_library.so
Name Duration (us) Percent Device Count Argument Shapes
matmul 8.40 14.48 cpu0 2 float32[1, 64], float32[64, 64], float32[1, 64]
vm.builtin.check_tensor_info 1.72 2.97 cpu0 1 float32[1, 64]
relu 0.92 1.58 cpu0 2 float32[1, 64], float32[1, 64]
vm.builtin.match_shape 0.78 1.34 cpu0 1 float32[1, 64]
----------
Sum 11.81 20.37 6
Total 58.00 cpu0 1
Configuration
-------------
Number of threads: 24
Executor: VM
测试元组#
@tvm.script.ir_module
class NestedTuple:
@R.function
def main(
x: R.Tensor((16,), "float32")
) -> R.Tuple(
R.Tuple(
R.Tensor((16,), "float32"),
R.Tuple(
R.Tensor((16,), "float32"),
),
),
R.Tensor((16,), "float32"),
):
return ((x, (x,)), x)
target = "llvm"
ex = tvm.compile(NestedTuple, target)
data_np = np.random.randn(16).astype("float32")
def callback(vm, data):
report = vm.profile("main", data)
assert "vm.builtin.make_tuple" in str(report)
with_rpc(ex, callback, data_np)
2025-08-25 16:26:22.805 INFO bind to 127.0.0.1:9091
2025-08-25 16:26:22.806 INFO connected from ('127.0.0.1', 33636)
2025-08-25 16:26:22.806 INFO start serving at /tmp/tmpdvxl0sd8
2025-08-25 16:26:22.821 INFO load_module /tmp/tmpdvxl0sd8/vm_library.so