PyTorch 和 TVM 之间的 DLPack 集成#
此测试验证以下内容:
DLPack 从 PyTorch 到 TVM 的转换
DLPack 从 TVM 到 PyTorch 的转换
转换过程中数据完整性的保存
DLPack 和 numpy 回退机制之间的功能等价性
对不支持的数据类型的错误处理
# 导入测试框架和必要的库
import pytest
import torch
import tvm
from tvm import relax, tir
from tvm.script import relax as R, tir as T
from tvm.relax import BasePyModule
import numpy as np
测试从 PyTorch 张量到 TVM NDArray 的 DLPack 转换#
# 创建一个 PyTorch 张量作为测试数据
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)
# 使用 DLPack 协议将 PyTorch 张量转换为 TVM NDArray
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)
# 验证转换结果的类型、形状和数据类型是否正确
assert isinstance(tvm_ndarray, tvm.nd.NDArray)
assert tvm_ndarray.shape == pytorch_tensor.shape
assert str(tvm_ndarray.dtype) == str(pytorch_tensor.dtype).replace("torch.", "")
# 验证转换后的数据值是否保持一致
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
测试从 GPU 上的 PyTorch 张量到 TVM NDArray 的 DLPack 转换#
if tvm.cuda().exist:
# 在 GPU 上创建一个 PyTorch 张量
pytorch_tensor = torch.tensor(
[1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32, device="cuda"
)
# 使用 DLPack 协议将 GPU 上的 PyTorch 张量转换为 TVM NDArray
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)
# 验证转换结果的类型、形状、数据类型和设备是否正确
assert isinstance(tvm_ndarray, tvm.nd.NDArray)
assert tvm_ndarray.shape == pytorch_tensor.shape
assert str(tvm_ndarray.dtype) == str(pytorch_tensor.dtype).replace("torch.", "")
assert str(tvm_ndarray.device) == "cuda:0"
# 将数据移动到 CPU 以便进行 numpy 转换和比较
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.cpu().numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
else:
# 如果 CUDA 不可用,则跳过此测试
pytest.skip("CUDA not available")
测试从 TVM NDArray 到 PyTorch 张量的 DLPack 转换#
import numpy as np
# 创建一个 numpy 数组作为原始数据
data = np.array([1.0, 2.0, 3.0, 5.0], dtype="float32")
# 将 numpy 数组转换为 TVM NDArray
tvm_ndarray = tvm.nd.array(data)
# 使用 DLPack 协议将 TVM NDArray 转换为 PyTorch 张量
pytorch_tensor = torch.from_dlpack(tvm_ndarray)
# 验证转换结果的类型、形状和数据类型是否正确
assert isinstance(pytorch_tensor, torch.Tensor)
assert pytorch_tensor.shape == tvm_ndarray.shape
assert pytorch_tensor.dtype == torch.float32
# 验证转换后的数据值是否保持一致
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
测试从 GPU 上的 TVM NDArray 到 PyTorch 张量的 DLPack 转换#
if tvm.cuda().exist:
import numpy as np
# 创建一个 numpy 数组作为原始数据
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype="float32")
# 将 numpy 数组转换为 GPU 上的 TVM NDArray
tvm_ndarray = tvm.nd.array(data, device=tvm.cuda(0))
# 使用 DLPack 协议将 GPU 上的 TVM NDArray 转换为 PyTorch 张量
pytorch_tensor = torch.from_dlpack(tvm_ndarray)
# 验证转换结果的类型、形状、数据类型和设备是否正确
assert isinstance(pytorch_tensor, torch.Tensor)
assert pytorch_tensor.shape == tvm_ndarray.shape
assert pytorch_tensor.dtype == torch.float32
assert pytorch_tensor.device.type == "cuda"
# 将数据移动到 CPU 以便进行 numpy 转换和比较
tvm_numpy = tvm_ndarray.numpy()
pytorch_numpy = pytorch_tensor.cpu().numpy()
np.testing.assert_allclose(tvm_numpy, pytorch_numpy, atol=1e-5)
else:
# 如果 CUDA 不可用,则跳过此测试
pytest.skip("CUDA not available")
测试 DLPack 往返转换:PyTorch -> TVM -> PyTorch#
# 创建 PyTorch 张量
original_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)
# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(original_tensor)
# 再转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)
# 验证往返转换的数据完整性
assert torch.allclose(original_tensor, result_tensor, atol=1e-5)
assert original_tensor.dtype == result_tensor.dtype
assert original_tensor.shape == result_tensor.shape
测试不同数据类型的 DLPack 转换#
# 定义要测试的数据类型对
test_types = [
(torch.float32, "float32"),
(torch.float64, "float64"),
(torch.int32, "int32"),
(torch.int64, "int64"),
]
# 对每种数据类型进行转换测试
for torch_dtype, tvm_dtype in test_types:
# 创建指定数据类型的 PyTorch 张量
pytorch_tensor = torch.tensor([1, 2, 3], dtype=torch_dtype)
# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)
# 再转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)
# 验证转换结果
assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)
assert pytorch_tensor.dtype == result_tensor.dtype
测试不同形状张量的 DLPack 转换#
# 定义要测试的张量形状
test_shapes = [
(1,), # 标量(1维)
(2, 3), # 矩阵(2维)
(4, 5, 6), # 3维张量
(1, 1, 1, 1), # 4维张量(特殊情况)
]
# 对每种形状进行转换测试
for shape in test_shapes:
# 创建指定形状的随机 PyTorch 张量
pytorch_tensor = torch.randn(shape, dtype=torch.float32)
# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)
# 再转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)
# 验证转换结果
assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)
assert pytorch_tensor.shape == result_tensor.shape
测试 DLPack 和 numpy 转换是否产生相同的结果#
# 创建一个大型 PyTorch 张量
size = 1000000
pytorch_tensor = torch.randn(size, dtype=torch.float32)
# 测试 DLPack 转换
tvm_ndarray_dlpack = tvm.nd.from_dlpack(pytorch_tensor)
# 测试 numpy 转换
numpy_array = pytorch_tensor.detach().cpu().numpy()
tvm_ndarray_numpy = tvm.nd.array(numpy_array)
# 验证两种方法产生相同的结果
result_dlpack = torch.from_dlpack(tvm_ndarray_dlpack)
result_numpy = torch.from_numpy(tvm_ndarray_numpy.numpy())
assert torch.allclose(result_dlpack, result_numpy, atol=1e-5)
# 验证数据完整性
assert torch.allclose(result_dlpack, pytorch_tensor, atol=1e-5)
assert result_dlpack.shape == pytorch_tensor.shape
assert result_dlpack.dtype == pytorch_tensor.dtype
测试 DLPack 对不支持操作的错误处理#
# 测试非连续张量
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)
non_contiguous = pytorch_tensor[::2] # 创建一个非连续视图
# 这应该可以工作(PyTorch 处理非连续张量)
try:
tvm_ndarray = tvm.nd.from_dlpack(non_contiguous)
result_tensor = torch.from_dlpack(tvm_ndarray)
assert torch.allclose(non_contiguous, result_tensor, atol=1e-5)
except Exception as e:
# 如果失败,这也是可以接受的
...
测试在 BasePyModule 上下文中的 DLPack 转换#
# 创建一个简单的 IRModule
@T.prim_func
def identity_func(A: T.Buffer((3,), "float32"), B: T.Buffer((3,), "float32")):
for i in T.grid(3):
B[i] = A[i]
# 创建 IRModule 并实例化 BasePyModule
ir_mod = tvm.IRModule({"identity_func": identity_func})
device = tvm.cpu(0)
py_mod = BasePyModule(ir_mod, device)
# 创建 PyTorch 张量
input_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)
# 调用 TIR 函数(这将触发 DLPack 转换)
result = py_mod.call_tir(identity_func, [input_tensor], R.Tensor((3,), "float32"))
# 验证结果
assert isinstance(result, torch.Tensor)
assert torch.allclose(result, input_tensor, atol=1e-5)
Warning: Failed to compile Relax VM: Module has no function 'vm_load_executable'
测试 DLPack 转换保持设备一致性#
# 测试 CPU 张量
cpu_tensor = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32)
cpu_tvm = tvm.nd.from_dlpack(cpu_tensor)
cpu_result = torch.from_dlpack(cpu_tvm)
assert cpu_result.device.type == "cpu"
assert torch.allclose(cpu_tensor, cpu_result, atol=1e-5)
# 注意:GPU 测试需要 CUDA/OpenCL 设置
# 这是一个基本测试,确保 CPU 工作正常
测试 DLPack 转换在可能的情况下共享内存#
# 创建 PyTorch 张量
pytorch_tensor = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0], dtype=torch.float32)
# 转换到 TVM
tvm_ndarray = tvm.nd.from_dlpack(pytorch_tensor)
# 修改原始张量
pytorch_tensor[0] = 10.0
# 转换回 PyTorch
result_tensor = torch.from_dlpack(tvm_ndarray)
# 结果应该反映修改(内存共享)
assert result_tensor[0] == 10.0
assert torch.allclose(pytorch_tensor, result_tensor, atol=1e-5)
测试批处理操作中的 DLPack 转换#
# 创建一批张量
batch_size = 10
pytorch_tensors = [torch.randn(5, dtype=torch.float32) for _ in range(batch_size)]
# 将所有张量转换为 TVM
tvm_ndarrays = [tvm.nd.from_dlpack(t) for t in pytorch_tensors]
# 将所有张量转换回 PyTorch
result_tensors = [torch.from_dlpack(t) for t in tvm_ndarrays]
# 验证所有转换
for i in range(batch_size):
assert torch.allclose(pytorch_tensors[i], result_tensors[i], atol=1e-5)
测试 DLPack 在边缘情况下的转换#
# 空张量测试
empty_tensor = torch.tensor([], dtype=torch.float32)
empty_tvm = tvm.nd.from_dlpack(empty_tensor)
empty_result = torch.from_dlpack(empty_tvm)
assert empty_result.shape == empty_tensor.shape
assert empty_result.dtype == empty_tensor.dtype
# 单元素张量测试
single_tensor = torch.tensor([42.0], dtype=torch.float32)
single_tvm = tvm.nd.from_dlpack(single_tensor)
single_result = torch.from_dlpack(single_tvm)
assert single_result.shape == single_tensor.shape
assert single_result[0] == 42.0