tvm.contrib.nnpack 源代码
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""External function interface to NNPACK libraries."""
import tvm
from tvm import te
import tvm._ffi
[文档]
def is_available():
"""Check whether NNPACK is available, that is, `nnp_initialize()`
returns `nnp_status_success`.
"""
return _initialize() == 0
[文档]
def fully_connected_inference(lhs, rhs, nthreads=1):
"""Create an extern op that compute fully connected of 1D tensor lhs and
2D tensor rhs with nnpack.
Parameters
----------
lhs : Tensor
lhs 1D array input[input_channels] of FP32 elements
rhs : Tensor
lhs 2D matrix kernel[output_channels][input_channels] of FP32 elements
Returns
-------
C : Tensor
lhs 1D array out[output_channels] of FP32 elements.
"""
m = rhs.shape[0]
return te.extern(
(m,),
[lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.nnpack.fully_connected_inference", ins[0], ins[1], outs[0], nthreads
),
name="C",
)
class ConvolutionAlgorithm:
AUTO = 0
FFT_8x8 = 1
FFT_16x16 = 2
WT_8x8 = 3
IMPLICIT_GEMM = 4
DIRECT = 5
WT_8x8_FP16 = 6
class ConvolutionTransformStrategy:
COMPUTE = 1
PRECOMPUTE = 2
[文档]
def convolution_inference(
data, kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO
):
"""Create an extern op to do inference convolution of 4D tensor data and
4D tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
data : Tensor
data 4D tensor input[batch][input_channels][input_height][input_width] of
FP32 elements.
kernel : Tensor
kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
bias : Tensor
bias 1D array bias[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
padding : list
padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right],
which indicates the padding around the feature map.
stride : list
stride A 2-dim list of [stride_height, stride_width], which indicates
the stride.
Returns
-------
output : Tensor
output 4D tensor output[batch][output_channels][output_height][output_width]
of FP32 elements.
"""
assert isinstance(padding, list) and len(padding) == 4
assert isinstance(stride, list) and len(stride) == 2
batch, _, input_height, input_width = data.shape
output_channels, _, kernel_height, kernel_width = kernel.shape
idxdiv = te.indexdiv
output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
return te.extern(
(batch, output_channels, output_height, output_width),
[data, kernel, bias] if bias is not None else [data, kernel],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.nnpack.convolution_inference",
ins[0],
ins[1],
ins[2] if bias is not None else 0,
outs[0],
padding[0],
padding[1],
padding[2],
padding[3],
stride[0],
stride[1],
nthreads,
algorithm,
),
name="C",
)
[文档]
def convolution_inference_without_weight_transform(
data, transformed_kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO
):
"""Create an extern op to do inference convolution of 4D tensor data and
4D pre-transformed tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
data : Tensor
data 4D tensor input[batch][input_channels][input_height][input_width] of
FP32 elements.
transformed_kernel : Tensor
transformed_kernel 4D tensor kernel[output_channels][input_channels][tile]
[tile] of FP32 elements.
bias : Tensor
bias 1D array bias[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
padding : list
padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right],
which indicates the padding around the feature map.
stride : list
stride A 2-dim list of [stride_height, stride_width], which indicates
the stride.
Returns
-------
output : Tensor
output 4D tensor output[batch][output_channels][output_height][output_width]
of FP32 elements.
"""
assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16)
assert isinstance(padding, list) and len(padding) == 4
assert isinstance(stride, list) and len(stride) == 2
batch, _, input_height, input_width = data.shape
output_channels, _, _, _ = transformed_kernel.shape
kernel_height, kernel_width = (3, 3)
idxdiv = te.indexdiv
output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
return te.extern(
(batch, output_channels, output_height, output_width),
[data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.nnpack.convolution_inference_without_weight_transform",
ins[0],
ins[1],
ins[2] if bias is not None else 0,
outs[0],
padding[0],
padding[1],
padding[2],
padding[3],
stride[0],
stride[1],
nthreads,
algorithm,
),
name="C",
dtype="float32",
)
[文档]
def convolution_inference_weight_transform(
kernel, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO, dtype="float32"
):
"""Create an extern op to do inference convolution of 3D tensor data and
4D tensor kernel and 1D tensor bias with nnpack.
Parameters
----------
kernel : Tensor
kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
[kernel_width] of FP32 elements.
Returns
-------
output : Tensor
output 4D tensor output[output_channels][input_channels][tile][tile]
of FP32 elements.
"""
assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16)
output_channels, input_channels, _, _ = kernel.shape
transform_tile_size = 8
if not isinstance(dtype, str):
dtype = dtype.dtype
return te.extern(
(output_channels, input_channels, transform_tile_size, transform_tile_size),
[kernel],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.nnpack.convolution_inference_weight_transform",
ins[0],
outs[0],
nthreads,
algorithm,
),
name="transform_kernel",
dtype=dtype,
)
tvm._ffi._init_api("tvm.contrib.nnpack")