PyTorch 和 TVM 之间的 DLPack 集成#

此测试验证以下内容:

  1. DLPack 从 PyTorch 到 TVM 的转换

  2. DLPack 从 TVM 到 PyTorch 的转换

  3. 转换过程中数据完整性的保存

  4. DLPack 和 numpy 回退机制之间的功能等价性

  5. 对不支持的数据类型的错误处理

# 导入测试框架和必要的库
import pytest
import torch
import tvm
from tvm import relax, tir
from tvm.script import relax as R, tir as T
from tvm.relax import BasePyModule
import numpy as np

测试从 PyTorch 张量到 TVM NDArray 的 DLPack 转换#

# 创建一个 PyTorch 张量作为测试数据
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)

# 使用 DLPack 协议将 PyTorch 张量转换为 TVM NDArray
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)

# 验证转换结果的类型、形状和数据类型是否正确
assert isinstance(tvm_ndarray, tvm.nd.NDArray)
assert tvm_ndarray.shape == pytorch_tensor.shape
assert str(tvm_ndarray.dtype) == str(pytorch_tensor.dtype).replace("torch.", "")

# 验证转换后的数据值是否保持一致
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)

测试从 GPU 上的 PyTorch 张量到 TVM NDArray 的 DLPack 转换#

if tvm.cuda().exist:
    # 在 GPU 上创建一个 PyTorch 张量
    pytorch_tensor = torch.tensor(
        [1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32, device="cuda"
    )

    # 使用 DLPack 协议将 GPU 上的 PyTorch 张量转换为 TVM NDArray
    tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)

    # 验证转换结果的类型、形状、数据类型和设备是否正确
    assert isinstance(tvm_ndarray, tvm.nd.NDArray)
    assert tvm_ndarray.shape == pytorch_tensor.shape
    assert str(tvm_ndarray.dtype) == str(pytorch_tensor.dtype).replace("torch.", "")
    assert str(tvm_ndarray.device) == "cuda:0"

    # 将数据移动到 CPU 以便进行 numpy 转换和比较
    tvm_numpy = tvm_ndarray.numpy()
    pytorch_numpy = pytorch_tensor.cpu().numpy()
    np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
else:
    # 如果 CUDA 不可用,则跳过此测试
    pytest.skip("CUDA not available")

测试从 TVM NDArray 到 PyTorch 张量的 DLPack 转换#

import numpy as np

# 创建一个 numpy 数组作为原始数据
data = np.array([1.0, 2.0, 3.0, 5.0], dtype="float32")
# 将 numpy 数组转换为 TVM NDArray
tvm_ndarray = tvm.nd.array(data)

# 使用 DLPack 协议将 TVM NDArray 转换为 PyTorch 张量
pytorch_tensor = torch.from_dlpack(tvm_ndarray)

# 验证转换结果的类型、形状和数据类型是否正确
assert isinstance(pytorch_tensor, torch.Tensor)
assert pytorch_tensor.shape == tvm_ndarray.shape
assert pytorch_tensor.dtype == torch.float32

# 验证转换后的数据值是否保持一致
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)

测试从 GPU 上的 TVM NDArray 到 PyTorch 张量的 DLPack 转换#

if tvm.cuda().exist:
    import numpy as np

    # 创建一个 numpy 数组作为原始数据
    data = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype="float32")
    # 将 numpy 数组转换为 GPU 上的 TVM NDArray
    tvm_ndarray = tvm.nd.array(data, device=tvm.cuda(0))

    # 使用 DLPack 协议将 GPU 上的 TVM NDArray 转换为 PyTorch 张量
    pytorch_tensor = torch.from_dlpack(tvm_ndarray)

    # 验证转换结果的类型、形状、数据类型和设备是否正确
    assert isinstance(pytorch_tensor, torch.Tensor)
    assert pytorch_tensor.shape == tvm_ndarray.shape
    assert pytorch_tensor.dtype == torch.float32
    assert pytorch_tensor.device.type == "cuda"

    # 将数据移动到 CPU 以便进行 numpy 转换和比较
    tvm_numpy = tvm_ndarray.numpy()
    pytorch_numpy = pytorch_tensor.cpu().numpy()
    np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
else:
    # 如果 CUDA 不可用,则跳过此测试
    pytest.skip("CUDA not available")

测试 DLPack 往返转换:PyTorch -> TVM -> PyTorch#

# 创建 PyTorch 张量
original_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)

# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(original_tensor)

# 再转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)

# 验证往返转换的数据完整性
assert torch.allclose(original_tensor, result_tensor, atol=1e-5)
assert original_tensor.dtype == result_tensor.dtype
assert original_tensor.shape == result_tensor.shape

测试不同数据类型的 DLPack 转换#

# 定义要测试的数据类型对
test_types = [
    (torch.float32, "float32"),
    (torch.float64, "float64"),
    (torch.int32, "int32"),
    (torch.int64, "int64"),
]

# 对每种数据类型进行转换测试
for torch_dtype, tvm_dtype in test_types:
    # 创建指定数据类型的 PyTorch 张量
    pytorch_tensor = torch.tensor([1, 2, 3], dtype=torch_dtype)

    # 转换到 TVM
    tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)

    # 再转换回 PyTorch
    result_tensor = torch.from_dlpack(tvm_ndarray)

    # 验证转换结果
    assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)
    assert pytorch_tensor.dtype == result_tensor.dtype

测试不同形状张量的 DLPack 转换#

# 定义要测试的张量形状
test_shapes = [
    (1,),           # 标量(1维)
    (2, 3),         # 矩阵(2维)
    (4, 5, 6),      # 3维张量
    (1, 1, 1, 1),   # 4维张量(特殊情况)
]

# 对每种形状进行转换测试
for shape in test_shapes:
    # 创建指定形状的随机 PyTorch 张量
    pytorch_tensor = torch.randn(shape, dtype=torch.float32)

    # 转换到 TVM
    tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)

    # 再转换回 PyTorch
    result_tensor = torch.from_dlpack(tvm_ndarray)

    # 验证转换结果
    assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)
    assert pytorch_tensor.shape == result_tensor.shape

测试 DLPack 和 numpy 转换是否产生相同的结果#

# 创建一个大型 PyTorch 张量
size = 1000000
pytorch_tensor = torch.randn(size, dtype=torch.float32)

# 测试 DLPack 转换
tvm_ndarray_dlpack = tvm.nd.from_dlpack(pytorch_tensor)

# 测试 numpy 转换
numpy_array = pytorch_tensor.detach().cpu().numpy()
tvm_ndarray_numpy = tvm.nd.array(numpy_array)

# 验证两种方法产生相同的结果
result_dlpack = torch.from_dlpack(tvm_ndarray_dlpack)
result_numpy = torch.from_numpy(tvm_ndarray_numpy.numpy())
assert torch.allclose(result_dlpack, result_numpy, atol=1e-5)

# 验证数据完整性
assert torch.allclose(result_dlpack, pytorch_tensor, atol=1e-5)
assert result_dlpack.shape == pytorch_tensor.shape
assert result_dlpack.dtype == pytorch_tensor.dtype

测试 DLPack 对不支持操作的错误处理#

# 测试非连续张量
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)
non_contiguous = pytorch_tensor[::2]  # 创建一个非连续视图

# 这应该可以工作(PyTorch 处理非连续张量)
try:
    tvm_ndarray = tvm.nd.from_dlpack(non_contiguous)
    result_tensor = torch.from_dlpack(tvm_ndarray)
    assert torch.allclose(non_contiguous, result_tensor, atol=1e-5)
except Exception as e:
    # 如果失败,这也是可以接受的
    ...

测试在 BasePyModule 上下文中的 DLPack 转换#

# 创建一个简单的 IRModule
@T.prim_func
def identity_func(A: T.Buffer((3,), "float32"), B: T.Buffer((3,), "float32")):
    for i in T.grid(3):
        B[i] = A[i]

# 创建 IRModule 并实例化 BasePyModule
ir_mod = tvm.IRModule({"identity_func": identity_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)

# 创建 PyTorch 张量
input_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)

# 调用 TIR 函数(这将触发 DLPack 转换)
result = py_mod.call_tir(identity_func, [input_tensor], R.Tensor((3,), "float32"))

# 验证结果
assert isinstance(result, torch.Tensor)
assert torch.allclose(result, input_tensor, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'

测试 DLPack 转换保持设备一致性#

# 测试 CPU 张量
cpu_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)
cpu_tvm = tvm.nd.from_dlpack(cpu_tensor)
cpu_result = torch.from_dlpack(cpu_tvm)

assert cpu_result.device.type == "cpu"
assert torch.allclose(cpu_tensor, cpu_result, atol=1e-5)

# 注意:GPU 测试需要 CUDA/OpenCL 设置
# 这是一个基本测试,确保 CPU 工作正常

测试 DLPack 转换在可能的情况下共享内存#

# 创建 PyTorch 张量
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)

# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)

# 修改原始张量
pytorch_tensor[0] = 10.0

# 转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)

# 结果应该反映修改(内存共享)
assert result_tensor[0] == 10.0
assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)

测试批处理操作中的 DLPack 转换#

# 创建一批张量
batch_size = 10
pytorch_tensors = [torch.randn(5, dtype=torch.float32) for _ in range(batch_size)]

# 将所有张量转换为 TVM
tvm_ndarrays = [tvm.nd.from_dlpack(t) for t in pytorch_tensors]

# 将所有张量转换回 PyTorch
result_tensors = [torch.from_dlpack(t) for t in tvm_ndarrays]

# 验证所有转换
for i in range(batch_size):
    assert torch.allclose(pytorch_tensors[i], result_tensors[i], atol=1e-5)

测试 DLPack 在边缘情况下的转换#

# 空张量测试
empty_tensor = torch.tensor([], dtype=torch.float32)
empty_tvm = tvm.nd.from_dlpack(empty_tensor)
empty_result = torch.from_dlpack(empty_tvm)

assert empty_result.shape == empty_tensor.shape
assert empty_result.dtype == empty_tensor.dtype

# 单元素张量测试
single_tensor = torch.tensor([42.0], dtype=torch.float32)
single_tvm = tvm.nd.from_dlpack(single_tensor)
single_result = torch.from_dlpack(single_tvm)

assert single_result.shape == single_tensor.shape
assert single_result[0] == 42.0