代码拉取完成,页面将自动刷新
import akg.tvm as tvm
import akg.topi as topi
from akg.tvm.hybrid import script
from akg.utils import kernel_exec as utils
from akg.utils import validation_check as vc_util
import akg
from akg.utils.format_transform import to_tvm_const
def irbuilder_op(input_0):
def kernel_ir(dst, data):
ib = tvm.ir_builder.create()
with ib.for_range_n(data.shape, "ax") as i:
zero = tvm.const(0, data.dtype)
one = tvm.const(1, data.dtype)
with ib.if_scope(ib.load(data, i) > zero):
ib.store(dst, 0, one)
return ib.get()
return tvm.extern((1,), [input_0], lambda ins, outs : kernel_ir(outs[0], ins[0]),
name = "elemany", dtype=input_0.dtype)
# def irbuilder_sliced_matmul(W, I, O, cube):
# def kernel_ir(ins):
# W = ins[0]
# I = ins[1]
# O = ins[2]
# ib = tvm.ir_builder.create()
# with ib.for_range()
def hybrid_op(input_0):
@script(capture=locals())
def nc1hwc0_to_nchw(inputs, bs, h, w, c, c1, c0):
output = allocate((bs, c, h, w), inputs.dtype, "local")
for n_i in range(bs):
for c_i in range(c1):
for h_i in range(h):
for w_i in range(w):
for c_i0 in range(c0):
output[n_i, c_i * c0 + c_i0, h_i, w_i] = inputs[n_i, c_i, h_i, w_i, c_i0]
return output
bs, c1, h, w, c0 = input_0.shape
return nc1hwc0_to_nchw(input_0, bs, h, w, c1 * c0, c1, c0)
# @vc_util.check_input_type(akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor, (list, tuple))
# def hybrid_sliced_matmul(W, I, O, cube):
# @script(capture=locals())
# def sliced_matmul(W, I, O, left, right, top, bottom, front, back):
# for i in range(top, bottom):
# for j in range(left, right):
# for k in range(front, back):
# O[i][j] = O[i][j] + W[i][k] * I[k][j]
# return O
# cube_const = [tvm.const(x) for x in cube]
# left, right, top, bottom, front, back = cube_const[0], cube_const[1], cube_const[2], cube_const[3], cube_const[4], cube_const[5]
# return sliced_matmul(W, I, O, left, right, top, bottom, front, back)
def hybrid_sliced_matmul(W, I, O):
@script(capture=locals())
def sliced_matmul(W, I, O):
cube_const = [tvm.const(x) for x in [0, 8, 0, 8, 0, 8]]
left, right, top, bottom, front, back = cube_const[0], cube_const[1], cube_const[2], cube_const[3], cube_const[4], cube_const[5]
output = allocate(O.shape, O.dtype, "local")
for i in range(top, bottom):
for j in range(left, right):
for k in range(front, back):
output[i][j] = O[i][j] + W[i][k] * I[k][j]
return output
# cube_const = [tvm.const(x) for x in cube]
return sliced_matmul(W, I, O)
if __name__ == "__main__":
op_attrs = [[0, 8, 0, 8, 0, 8]]
utils.op_build(hybrid_sliced_matmul,
[[16,16],[16,16],[16,16]],
["float32","float32","float32"],
kernel_name="hybrid_sliced_matmul", attrs = {"target":"cuda"}, dump_ir=False)
# utils.op_build(irbuilder_op,
# [[1024, 512]],
# ["float32"],
# kernel_name="irbuilder_op", attrs = {"target":"cuda"}, dump_ir=False)
# utils.op_build(hybrid_op,
# [[2, 32, 16, 512, 128]],
# ["float32"],
# kernel_name="hybrid_op", attrs = {"target":"cuda"}, dump_ir=False)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。