测试 profiler

目录

测试 profiler#

import numpy as np
import tvm
import tvm.testing
from tvm.runtime.vm import VirtualMachine
from tvm import relax, rpc
from tvm.contrib import utils
from tvm.relax.testing import nn
from tvm.script import relax as R

def get_exec(data_shape):
    builder = relax.BlockBuilder()
    weight1_np = np.random.randn(64, 64).astype("float32")
    weight2_np = np.random.randn(64, 64).astype("float32")

    with builder.function("main"):
        model = nn.Sequential(
            nn.Linear(data_shape[1], weight1_np.shape[0], bias=False),
            nn.ReLU(),
            nn.Linear(weight2_np.shape[0], weight2_np.shape[1], bias=False),
            nn.ReLU(),
        )
        data = nn.Placeholder(data_shape, name="data")
        output = model(data)
        params = [data] + model.parameters()
        builder.emit_func_output(output, params=params)

    mod = builder.get()

    params = {"linear_weight": weight1_np, "linear_weight1": weight2_np}
    mod = relax.transform.BindParams("main", params)(mod)

    target = "llvm"
    return tvm.compile(mod, target)

测试 conv2d cpu#

data_np = np.random.randn(1, 64).astype("float32")
ex = get_exec(data_np.shape)

vm = VirtualMachine(ex, tvm.cpu(), profile=True)
report = vm.profile("main", tvm.nd.array(data_np))
print(report)

assert "Duration" in str(report)
assert "matmul" in str(report)

Name                          Duration (us)  Percent  Device  Count                                  Argument Shapes  
matmul                                12.94    15.70    cpu0      2  float32[1, 64], float32[64, 64], float32[1, 64]  
vm.builtin.check_tensor_info           1.61     1.95    cpu0      1                                   float32[1, 64]  
relu                                   1.43     1.74    cpu0      2                   float32[1, 64], float32[1, 64]  
vm.builtin.match_shape                 1.26     1.53    cpu0      1                                   float32[1, 64]  
----------                                                                                                            
Sum                                   17.23    20.92              6                                                   
Total                                 82.39             cpu0      1                                                   

Configuration
-------------
Number of threads: 24
Executor: VM

测试 rpc#

def with_rpc(ex, f, data_np):
    temp = utils.tempdir()
    path = temp.relpath("vm_library.so")
    ex.export_library(path)

    server = rpc.Server("127.0.0.1")
    remote = rpc.connect(server.host, server.port, session_timeout=10)

    remote.upload(path)
    rexec = remote.load_module("vm_library.so")

    device = remote.cpu()

    vm = VirtualMachine(rexec, device=device, profile=True)
    data = tvm.nd.array(data_np, device)

    f(vm, data)

data_np = np.random.randn(1, 64).astype("float32")
ex = get_exec(data_np.shape)

def callback(vm, data):
    vm.profile("main", data)

    vm.set_input("main", data)
    report = vm.profile("main")

    assert "matmul" in str(report)
    print(report)

with_rpc(ex, callback, data_np)

2025-08-25 16:25:51.924 INFO bind to 127.0.0.1:9091
2025-08-25 16:25:51.925 INFO connected from ('127.0.0.1', 36662)
2025-08-25 16:25:51.926 INFO start serving at /tmp/tmptnsl0kx3
2025-08-25 16:25:51.941 INFO load_module /tmp/tmptnsl0kx3/vm_library.so

Name                          Duration (us)  Percent  Device  Count                                  Argument Shapes  
matmul                                 8.40    14.48    cpu0      2  float32[1, 64], float32[64, 64], float32[1, 64]  
vm.builtin.check_tensor_info           1.72     2.97    cpu0      1                                   float32[1, 64]  
relu                                   0.92     1.58    cpu0      2                   float32[1, 64], float32[1, 64]  
vm.builtin.match_shape                 0.78     1.34    cpu0      1                                   float32[1, 64]  
----------                                                                                                            
Sum                                   11.81    20.37              6                                                   
Total                                 58.00             cpu0      1                                                   

Configuration
-------------
Number of threads: 24
Executor: VM

测试元组#

@tvm.script.ir_module
class NestedTuple:
    @R.function
    def main(
        x: R.Tensor((16,), "float32")
    ) -> R.Tuple(
        R.Tuple(
            R.Tensor((16,), "float32"),
            R.Tuple(
                R.Tensor((16,), "float32"),
            ),
        ),
        R.Tensor((16,), "float32"),
    ):
        return ((x, (x,)), x)

target = "llvm"
ex = tvm.compile(NestedTuple, target)

data_np = np.random.randn(16).astype("float32")

def callback(vm, data):
    report = vm.profile("main", data)
    assert "vm.builtin.make_tuple" in str(report)

with_rpc(ex, callback, data_np)

2025-08-25 16:26:22.805 INFO bind to 127.0.0.1:9091
2025-08-25 16:26:22.806 INFO connected from ('127.0.0.1', 33636)
2025-08-25 16:26:22.806 INFO start serving at /tmp/tmpdvxl0sd8
2025-08-25 16:26:22.821 INFO load_module /tmp/tmpdvxl0sd8/vm_library.so