1 Star 0 Fork 0

原水衣人/sparsity_compiler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
kernel_demo.py 3.51 KB
一键复制 编辑 原始数据 按行查看 历史
import akg.tvm as tvm
import akg.topi as topi
from akg.tvm.hybrid import script
from akg.utils import kernel_exec as utils
from akg.utils import validation_check as vc_util
import akg
from akg.utils.format_transform import to_tvm_const
def irbuilder_op(input_0):
def kernel_ir(dst, data):
ib = tvm.ir_builder.create()
with ib.for_range_n(data.shape, "ax") as i:
zero = tvm.const(0, data.dtype)
one = tvm.const(1, data.dtype)
with ib.if_scope(ib.load(data, i) > zero):
ib.store(dst, 0, one)
return ib.get()
return tvm.extern((1,), [input_0], lambda ins, outs : kernel_ir(outs[0], ins[0]),
name = "elemany", dtype=input_0.dtype)
# def irbuilder_sliced_matmul(W, I, O, cube):
# def kernel_ir(ins):
# W = ins[0]
# I = ins[1]
# O = ins[2]
# ib = tvm.ir_builder.create()
# with ib.for_range()
def hybrid_op(input_0):
@script(capture=locals())
def nc1hwc0_to_nchw(inputs, bs, h, w, c, c1, c0):
output = allocate((bs, c, h, w), inputs.dtype, "local")
for n_i in range(bs):
for c_i in range(c1):
for h_i in range(h):
for w_i in range(w):
for c_i0 in range(c0):
output[n_i, c_i * c0 + c_i0, h_i, w_i] = inputs[n_i, c_i, h_i, w_i, c_i0]
return output
bs, c1, h, w, c0 = input_0.shape
return nc1hwc0_to_nchw(input_0, bs, h, w, c1 * c0, c1, c0)
# @vc_util.check_input_type(akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor, akg.tvm.tensor.Tensor, (list, tuple))
# def hybrid_sliced_matmul(W, I, O, cube):
# @script(capture=locals())
# def sliced_matmul(W, I, O, left, right, top, bottom, front, back):
# for i in range(top, bottom):
# for j in range(left, right):
# for k in range(front, back):
# O[i][j] = O[i][j] + W[i][k] * I[k][j]
# return O
# cube_const = [tvm.const(x) for x in cube]
# left, right, top, bottom, front, back = cube_const[0], cube_const[1], cube_const[2], cube_const[3], cube_const[4], cube_const[5]
# return sliced_matmul(W, I, O, left, right, top, bottom, front, back)
def hybrid_sliced_matmul(W, I, O):
@script(capture=locals())
def sliced_matmul(W, I, O):
cube_const = [tvm.const(x) for x in [0, 8, 0, 8, 0, 8]]
left, right, top, bottom, front, back = cube_const[0], cube_const[1], cube_const[2], cube_const[3], cube_const[4], cube_const[5]
output = allocate(O.shape, O.dtype, "local")
for i in range(top, bottom):
for j in range(left, right):
for k in range(front, back):
output[i][j] = O[i][j] + W[i][k] * I[k][j]
return output
# cube_const = [tvm.const(x) for x in cube]
return sliced_matmul(W, I, O)
if __name__ == "__main__":
op_attrs = [[0, 8, 0, 8, 0, 8]]
utils.op_build(hybrid_sliced_matmul,
[[16,16],[16,16],[16,16]],
["float32","float32","float32"],
kernel_name="hybrid_sliced_matmul", attrs = {"target":"cuda"}, dump_ir=False)
# utils.op_build(irbuilder_op,
# [[1024, 512]],
# ["float32"],
# kernel_name="irbuilder_op", attrs = {"target":"cuda"}, dump_ir=False)
# utils.op_build(hybrid_op,
# [[2, 32, 16, 512, 128]],
# ["float32"],
# kernel_name="hybrid_op", attrs = {"target":"cuda"}, dump_ir=False)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/MondayYuan/sparsity_compiler.git
git@gitee.com:MondayYuan/sparsity_compiler.git
MondayYuan
sparsity_compiler
sparsity_compiler
main

搜索帮助