BasePyModule

目录

`BasePyModule`#

BasePyModule 是 TVM Relax 模块中的重要组件，它提供了在 Python 环境中集成和执行 TIR（Tensor Intermediate Representation）函数的能力。

测试 BasePyModule 核心功能：

BasePyModule 的实例化和基本方法
TIR 函数的编译和执行
Python 函数的集成
PyTorch 和 TVM 之间的 DLPack 转换

# 导入测试框架和必要的库
import torch
import tvm
from tvm import relax, tir
from tvm.script import relax as R, tir as T
from tvm.relax import BasePyModule
import numpy as np

测试 BasePyModule 的基本实例化功能#

# 定义简单的 TIR 原语函数，将输入数组中的每个元素乘以 2
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
    for i in T.grid(10):
        B[i] = A[i] * 2.0

# 创建 IRModule 并将 TIR 函数添加到其中
ir_mod = tvm.IRModule({"simple_func": simple_func})
# 指定在 CPU 上执行
device = tvm.cpu(0)
# 实例化 BasePyModule
py_mod = BasePyModule(ir_mod, device)

# 验证实例化是否成功以及是否具有必要的属性和方法
assert isinstance(py_mod, BasePyModule)
assert hasattr(py_mod, "call_tir")
assert hasattr(py_mod, "call_dps_packed")
assert hasattr(py_mod, "compiled_tir_funcs")

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试在 GPU 上实例化 BasePyModule#

# 定义一个简单的 TIR 原语函数
@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
    for i in T.grid(10):
        B[i] = A[i] * 2.0

# 创建 IRModule
ir_mod = tvm.IRModule({"simple_func": simple_func})

# 检查 CUDA 是否可用
if tvm.cuda().exist:
    # 在 GPU 设备上实例化 BasePyModule
    device = tvm.cuda(0)
    py_mod = BasePyModule(ir_mod, device)

    # 验证 GPU 版本的实例化是否成功
    assert isinstance(py_mod, BasePyModule)
    assert hasattr(py_mod, "call_tir")
    assert hasattr(py_mod, "call_dps_packed")
    assert hasattr(py_mod, "compiled_tir_funcs")
    # 检查目标设备是否包含 "cuda"
    assert "cuda" in str(py_mod.target)
else:
    # 如果 CUDA 不可用，跳过测试
    pytest.skip("CUDA not available")

Warning: Failed to compile one or more TIR functions: Memory verification failed with the following errors:
    Variable `B` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
    Variable `A` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
  Did you forget to bind?
# from tvm.script import tir as T

@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
    T.func_attr({"target": T.target({"arch": "sm_86", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "max_shared_memory_per_block": 49152, "max_threads_per_block": 1024, "tag": "", "thread_warp_size": 32})})
    for i in range(10):
        B_1 = T.Buffer((10,), data=B.data)
        A_1 = T.Buffer((10,), data=A.data)
        B_1[i] = A_1[i] * T.float32(2.0)
Warning: Failed to compile Relax VM: Memory verification failed with the following errors:
    Variable `B` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
    Variable `A` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
  Did you forget to bind?
# from tvm.script import tir as T

@T.prim_func
def simple_func(A: T.Buffer((10,), "float32"), B: T.Buffer((10,), "float32")):
    T.func_attr({"target": T.target({"arch": "sm_86", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "max_shared_memory_per_block": 49152, "max_threads_per_block": 1024, "tag": "", "thread_warp_size": 32})})
    for i in range(10):
        B_1 = T.Buffer((10,), data=B.data)
        A_1 = T.Buffer((10,), data=A.data)
        B_1[i] = A_1[i] * T.float32(2.0)

测试 TIR 函数的编译功能#

# 定义一个执行数组加法的 TIR 函数
@T.prim_func
def add_func(
    A: T.Buffer((5,), "float32"), B: T.Buffer((5,), "float32"), C: T.Buffer((5,), "float32")
):
    for i in T.grid(5):
        C[i] = A[i] + B[i]

# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"add_func": add_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 验证 TIR 函数是否已被正确编译和存储
assert "add_func" in py_mod.tir_func_names
assert "add_func" in py_mod.compiled_tir_funcs

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试使用 PyTorch 张量调用 TIR 函数#

# 定义一个将输入数组乘以 2.5 的 TIR 函数
@T.prim_func
def scale_func(A: T.Buffer((4,), "float32"), B: T.Buffer((4,), "float32")):
    for i in T.grid(4):
        B[i] = A[i] * T.float32(2.5)

# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"scale_func": scale_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 创建 PyTorch 输入张量
input_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32)
scale_value = 2.5

# 调用 TIR 函数并获取结果
result = py_mod.call_tir(scale_func, [input_tensor], R.Tensor((4,), "float32"))

# 验证结果是否正确
assert isinstance(result, torch.Tensor)
assert result.shape == (4,)
expected = input_tensor * scale_value
assert torch.allclose(result, expected, atol=1e-5)

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试在 GPU 上使用 PyTorch 张量调用 TIR 函数#

# 检查 CUDA 是否可用
if tvm.cuda().exist:
    # 创建一个不包含 TIR 函数的 IRModule（仅用于 GPU 基本功能测试）
    ir_mod = tvm.IRModule({})
    device = tvm.cuda(0)
    py_mod = BasePyModule(ir_mod, device)

    # 验证 GPU 版本的实例化是否成功
    assert isinstance(py_mod, BasePyModule)
    assert hasattr(py_mod, "call_tir")
    assert hasattr(py_mod, "call_dps_packed")
    assert "cuda" in str(py_mod.target)

    # 测试是否可以创建和使用 GPU 张量
    input_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32, device="cuda")
    assert input_tensor.device.type == "cuda"
    assert input_tensor.shape == (4,)
else:
    # 如果 CUDA 不可用，跳过测试
    pytest.skip("CUDA not available")

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试 PyTorch 到 TVM 的 DLPack 转换功能#

# 定义一个恒等变换的 TIR 函数（输入等于输出）
@T.prim_func
def identity_func(A: T.Buffer((3,), "float32"), B: T.Buffer((3,), "float32")):
    for i in T.grid(3):
        B[i] = A[i]

# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"identity_func": identity_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 创建 PyTorch 输入张量
input_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)

# 调用 TIR 函数并获取结果（验证 DLPack 转换是否正确）
result = py_mod.call_tir(identity_func, [input_tensor], R.Tensor((3,), "float32"))

# 验证结果是否与输入相同（恒等变换）
assert isinstance(result, torch.Tensor)
assert torch.allclose(result, input_tensor, atol=1e-5)

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试 TVM 到 PyTorch 的 DLPack 转换功能#

# 定义一个生成常数值的 TIR 函数
@T.prim_func
def constant_func(B: T.Buffer((2,), "float32")):
    for i in T.grid(2):
        B[i] = T.float32(5.0)

# 创建 IRModule 和 BasePyModule 实例
ir_mod = tvm.IRModule({"constant_func": constant_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 调用 TIR 函数并获取结果
result = py_mod.call_tir(constant_func, [], R.Tensor((2,), "float32"))

# 验证结果是否正确
assert isinstance(result, torch.Tensor)
assert result.shape == (2,)
expected = torch.tensor([5.0, 5.0], dtype=torch.float32)
assert torch.allclose(result, expected, atol=1e-5)

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试向 BasePyModule 添加 Python 函数的功能#

# 创建一个不包含 TIR 函数的 IRModule
ir_mod = tvm.IRModule({})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 定义一个自定义的激活函数（tanh）
def custom_activation(x):
    return torch.tanh(x)

# 向 BasePyModule 添加 Python 函数
py_mod.add_python_function("custom_activation", custom_activation)

# 验证函数是否已成功添加
assert hasattr(py_mod, "custom_activation")
assert "custom_activation" in py_mod.pyfuncs

# 测试添加的 Python 函数是否可以正常工作
input_tensor = torch.tensor([1.0, -1.0, 0.0], dtype=torch.float32)
result = py_mod.custom_activation(input_tensor)

# 验证结果是否正确
assert isinstance(result, torch.Tensor)
expected = torch.tanh(input_tensor)
assert torch.allclose(result, expected, atol=1e-5)

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试通过 `call_dps_packed` 调用 Python 函数的功能#

# 创建一个不包含 TIR 函数的 IRModule
ir_mod = tvm.IRModule({})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 定义一个自定义的 softmax 函数
def my_softmax(tensor, dim):
    return torch.softmax(tensor, dim=dim)

# 向 BasePyModule 添加 Python 函数
py_mod.add_python_function("my_softmax", my_softmax)

# 创建 PyTorch 输入张量
input_tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=torch.float32)

# 通过 call_dps_packed 调用添加的 Python 函数
result = py_mod.call_dps_packed(
    "my_softmax", [input_tensor, 1], R.Tensor((2, 2), "float32")
)

# 验证结果是否正确
assert isinstance(result, torch.Tensor)
expected = torch.softmax(input_tensor, dim=1)
assert torch.allclose(result, expected, atol=1e-5)

Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'