解读 tvm.tir.transform.LowerDeviceStorageAccessInfo

解读 tvm.tir.transform.LowerDeviceStorageAccessInfo#

参考:tvm/tests/python/tir-transform/test_tir_transform_lower_device_storage_access_info.py

import sys
from pathlib import Path
ROOT = Path(".").resolve().parents[2]
sys.path.extend([f"{ROOT}/tests", f"{ROOT}/src"])
# # from tools.tag_span import _create_span, _set_span, _verify_structural_equal_with_span
from tools.torch_utils import verify_model
import tvm
from tvm.script import tir as T
@tvm.register_func("tvm.info.mem.global.test_with_head_address")
def mem_info_with_head_address():
    return tvm.ir.make_node(
        "MemoryInfo",
        unit_bits=8,
        max_simd_bits=32,
        max_num_bits=128,
        head_address=tvm.tir.call_extern("handle", "dummy_head_address"),
    )

@tvm.register_func("tvm.info.mem.global.test_without_head_address")
def mem_info_without_head_address():
    return tvm.ir.make_node(
        "MemoryInfo",
        unit_bits=8,
        max_simd_bits=32,
        max_num_bits=128,
        head_address=None,
    )

将 CPU 可见的缓冲区分配替换为 LetStmt#

对于 CPU 可以访问的范围(例如 hexagon 上的 VTCM),头地址指定了如何访问它,并用于替换 AllocateNode。

class BaseCompare(tvm.testing.CompareBeforeAfter):
    transform = tvm.tir.transform.LowerDeviceStorageAccessInfo()


class TestLowerCPUAccessibleScope(BaseCompare):
    """Allocate of CPU-visible buffers are replaced by LetStmt

    For scopes that are accessible by the CPU (e.g. VTCM on hexagon),
    the head address specifies how it should be accessed, and is used
    to replace the AllocateNode.
    """

    def before():
        ptr = T.allocate([16], "float32", scope="global.test_with_head_address")
        T.evaluate(ptr)

    def expected():
        ptr: T.handle("float32", "global.test_with_head_address") = T.call_extern(  # noqa: F722
            "handle", "dummy_head_address"
        )
        T.evaluate(ptr)


class TestLowerCPUAccessibleScopeWithDeclBuffer(BaseCompare):
    """Like TestLowerCPUAccessibleScope, but with a DeclBuffer.

    When the Allocate is updated, the DeclBuffer should not contain a
    dangling reference.
    """

    def before():
        buf = T.decl_buffer(16, "float32", scope="global.test_with_head_address")
        T.evaluate(buf.data)

    def expected():
        ptr: T.handle("float32", "global.test_with_head_address") = T.call_extern(  # noqa: F722
            "handle", "dummy_head_address"
        )
        buf = T.decl_buffer(16, "float32", scope="global.test_with_head_address", data=ptr)
        T.evaluate(ptr)


class TestLowerCPUInaccessibleScope(BaseCompare):
    """Allocate of CPU-visible buffers are replaced by LetStmt

    For scopes that are inaccessible by the CPU (e.g. Texture memory
    on GPU), the allocate is removed.  All CPU-side references to the
    buffer should have been lowered by this point.
    """

    def before():
        ptr = T.allocate([16], "float32", scope="global.test_without_head_address")
        T.evaluate(0)

    def expected():
        T.evaluate(0)


class TestLowerCPUInaccessibleScopeWithDeclBuffer(BaseCompare):
    """Like TestLowerCPUInaccessibleScope, but with a DeclBuffer

    When the Allocate is removed, the DeclBuffer should not contain a
    dangling reference.
    """

    def before():
        buf = T.decl_buffer(16, "float32", scope="global.test_without_head_address")
        T.evaluate(0)

    def expected():
        T.evaluate(0)