BasePyModule#
BasePyModule 是 TVM Relax 模块中的重要组件,它提供了在 Python 环境中集成和执行 TIR(Tensor Intermediate Representation)函数的能力。
测试 BasePyModule 核心功能:
BasePyModule 的实例化和基本方法
TIR 函数的编译和执行
Python 函数的集成
PyTorch 和 TVM 之间的 DLPack 转换
# 导入测试框架和必要的库
import torch
import tvm
from tvm import relax, tir
from tvm.script import relax as R, tir as T
from tvm.relax import BasePyModule
import numpy as np
测试 BasePyModule 的基本实例化功能#
# 定义简单的 TIR 原语函数,将输入数组中的每个元素乘以 2
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
for i in T.grid(10):
B[i] = A[i] * 2.0
# 创建 IRModule 并将 TIR 函数添加到其中
ir_mod = tvm.IRModule({"simple_func": simple_func})
# 指定在 CPU 上执行
device = tvm.cpu(0)
# 实例化 BasePyModule
py_mod = BasePyModule(ir_mod, device)
# 验证实例化是否成功以及是否具有必要的属性和方法
assert isinstance(py_mod, BasePyModule)
assert hasattr(py_mod, "call_tir")
assert hasattr(py_mod, "call_dps_packed")
assert hasattr(py_mod, "compiled_tir_funcs")
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试在 GPU 上实例化 BasePyModule#
# 定义一个简单的 TIR 原语函数
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
for i in T.grid(10):
B[i] = A[i] * 2.0
# 创建 IRModule
ir_mod = tvm.IRModule({"simple_func": simple_func})
# 检查 CUDA 是否可用
if tvm.cuda().exist:
# 在 GPU 设备上实例化 BasePyModule
device = tvm.cuda(0)
py_mod = BasePyModule(ir_mod, device)
# 验证 GPU 版本的实例化是否成功
assert isinstance(py_mod, BasePyModule)
assert hasattr(py_mod, "call_tir")
assert hasattr(py_mod, "call_dps_packed")
assert hasattr(py_mod, "compiled_tir_funcs")
# 检查目标设备是否包含 "cuda"
assert "cuda" in str(py_mod.target)
else:
# 如果 CUDA 不可用,跳过测试
pytest.skip("CUDA not available")
Warning: Failed to compile one or more TIR functions: Memory verification failed with the following errors:
Variable `B` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Variable `A` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Did you forget to bind?
# from tvm.script import tir as T
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
T.func_attr({"target": T.target({"arch": "sm_86", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "max_shared_memory_per_block": 49152, "max_threads_per_block": 1024, "tag": "", "thread_warp_size": 32})})
for i in range(10):
B_1 = T.Buffer((10,), data=B.data)
A_1 = T.Buffer((10,), data=A.data)
B_1[i] = A_1[i] * T.float32(2.0)
Warning: Failed to compile Relax VM: Memory verification failed with the following errors:
Variable `B` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Variable `A` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Did you forget to bind?
# from tvm.script import tir as T
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
T.func_attr({"target": T.target({"arch": "sm_86", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "max_shared_memory_per_block": 49152, "max_threads_per_block": 1024, "tag": "", "thread_warp_size": 32})})
for i in range(10):
B_1 = T.Buffer((10,), data=B.data)
A_1 = T.Buffer((10,), data=A.data)
B_1[i] = A_1[i] * T.float32(2.0)
测试 TIR 函数的编译功能#
# 定义一个执行数组加法的 TIR 函数
@T.prim_func
def add_func(
A: T.Buffer((5,), "float32"), B: T.Buffer((5,), "float32"), C: T.Buffer((5,), "float32")
):
for i in T.grid(5):
C[i] = A[i] + B[i]
# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"add_func": add_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 验证 TIR 函数是否已被正确编译和存储
assert "add_func" in py_mod.tir_func_names
assert "add_func" in py_mod.compiled_tir_funcs
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试使用 PyTorch 张量调用 TIR 函数#
# 定义一个将输入数组乘以 2.5 的 TIR 函数
@T.prim_func
def scale_func(A: T.Buffer((4,), "float32"), B: T.Buffer((4,), "float32")):
for i in T.grid(4):
B[i] = A[i] * T.float32(2.5)
# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"scale_func": scale_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 创建 PyTorch 输入张量
input_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32)
scale_value = 2.5
# 调用 TIR 函数并获取结果
result = py_mod.call_tir(scale_func, [input_tensor], R.Tensor((4,), "float32"))
# 验证结果是否正确
assert isinstance(result, torch.Tensor)
assert result.shape == (4,)
expected = input_tensor * scale_value
assert torch.allclose(result, expected, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试在 GPU 上使用 PyTorch 张量调用 TIR 函数#
# 检查 CUDA 是否可用
if tvm.cuda().exist:
# 创建一个不包含 TIR 函数的 IRModule(仅用于 GPU 基本功能测试)
ir_mod = tvm.IRModule({})
device = tvm.cuda(0)
py_mod = BasePyModule(ir_mod, device)
# 验证 GPU 版本的实例化是否成功
assert isinstance(py_mod, BasePyModule)
assert hasattr(py_mod, "call_tir")
assert hasattr(py_mod, "call_dps_packed")
assert "cuda" in str(py_mod.target)
# 测试是否可以创建和使用 GPU 张量
input_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32, device="cuda")
assert input_tensor.device.type == "cuda"
assert input_tensor.shape == (4,)
else:
# 如果 CUDA 不可用,跳过测试
pytest.skip("CUDA not available")
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试 PyTorch 到 TVM 的 DLPack 转换功能#
# 定义一个恒等变换的 TIR 函数(输入等于输出)
@T.prim_func
def identity_func(A: T.Buffer((3,), "float32"), B: T.Buffer((3,), "float32")):
for i in T.grid(3):
B[i] = A[i]
# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"identity_func": identity_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 创建 PyTorch 输入张量
input_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)
# 调用 TIR 函数并获取结果(验证 DLPack 转换是否正确)
result = py_mod.call_tir(identity_func, [input_tensor], R.Tensor((3,), "float32"))
# 验证结果是否与输入相同(恒等变换)
assert isinstance(result, torch.Tensor)
assert torch.allclose(result, input_tensor, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试 TVM 到 PyTorch 的 DLPack 转换功能#
# 定义一个生成常数值的 TIR 函数
@T.prim_func
def constant_func(B: T.Buffer((2,), "float32")):
for i in T.grid(2):
B[i] = T.float32(5.0)
# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"constant_func": constant_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 调用 TIR 函数并获取结果
result = py_mod.call_tir(constant_func, [], R.Tensor((2,), "float32"))
# 验证结果是否正确
assert isinstance(result, torch.Tensor)
assert result.shape == (2,)
expected = torch.tensor([5.0, 5.0], dtype=torch.float32)
assert torch.allclose(result, expected, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试向 BasePyModule 添加 Python 函数的功能#
# 创建一个不包含 TIR 函数的 IRModule
ir_mod = tvm.IRModule({})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 定义一个自定义的激活函数(tanh)
def custom_activation(x):
return torch.tanh(x)
# 向 BasePyModule 添加 Python 函数
py_mod.add_python_function("custom_activation", custom_activation)
# 验证函数是否已成功添加
assert hasattr(py_mod, "custom_activation")
assert "custom_activation" in py_mod.pyfuncs
# 测试添加的 Python 函数是否可以正常工作
input_tensor = torch.tensor([1.0, -1.0, 0.0], dtype=torch.float32)
result = py_mod.custom_activation(input_tensor)
# 验证结果是否正确
assert isinstance(result, torch.Tensor)
expected = torch.tanh(input_tensor)
assert torch.allclose(result, expected, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试通过 call_dps_packed 调用 Python 函数的功能#
# 创建一个不包含 TIR 函数的 IRModule
ir_mod = tvm.IRModule({})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 定义一个自定义的 softmax 函数
def my_softmax(tensor, dim):
return torch.softmax(tensor, dim=dim)
# 向 BasePyModule 添加 Python 函数
py_mod.add_python_function("my_softmax", my_softmax)
# 创建 PyTorch 输入张量
input_tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=torch.float32)
# 通过 call_dps_packed 调用添加的 Python 函数
result = py_mod.call_dps_packed(
"my_softmax", [input_tensor, 1], R.Tensor((2, 2), "float32")
)
# 验证结果是否正确
assert isinstance(result, torch.Tensor)
expected = torch.softmax(input_tensor, dim=1)
assert torch.allclose(result, expected, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'