1 Star 0 Fork 0

原水衣人/sparsity_compiler

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test_akg.py 5.03 KB
一键复制 编辑 原始数据 按行查看 历史
import numpy as np
from gen_random import random_gaussian
from akg.utils import kernel_exec as utils
from akg.utils.result_analysis import gpu_profiling
from akg.utils.format_transform import to_tvm_nd_array
from akg.ops.math_gpu.add import add
from akg.ops.math_gpu.batch_matmul import batch_matmul
from split import split
from my_utils import generate_mask
def gen_data(shape1, shape2, dtype):
support_list = {"float16": np.float16, "float32": np.float32}
lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
expect = np.add(lhs, rhs)
output = np.full(expect.shape, np.nan, dtype)
return lhs, rhs, output, expect
def gen_data_batch_matmul(shape1, shape2, dtype):
support_list = {"float16": np.float16, "float32": np.float32}
lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
expect = np.einsum("bhik, bhjk -> bhij", lhs, rhs)
# expect = np.zeros((shape1[0], shape1[1], shape1[2],shape2[2]))
output = np.full((shape1[0], shape1[1], shape1[2],shape2[2]), np.nan, dtype)
return lhs, rhs, output, expect
def gen_data_batch_matmul_3D(shape1, shape2, dtype):
support_list = {"float16": np.float16, "float32": np.float32}
# lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
# rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
lhs = np.random.randint(1, 100, shape1).astype(support_list[dtype])
rhs = np.random.randint(1, 100, shape2).astype(support_list[dtype])
expect = np.einsum("bik, bjk -> bij", lhs, rhs)
# expect = np.zeros((shape1[0], shape1[1], shape1[2],shape2[2]))
output = np.full((shape1[0], shape1[1],shape2[1]), np.nan, dtype)
return lhs, rhs, output, expect
def test_ms_add(shape1, shape2, dtype):
mod = utils.op_build_test(add, (shape1, shape2), (dtype, dtype), kernel_name="add", attrs={"target": "cuda"})
lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
print("Test {}".format("Pass" if res else "Fail"))
if not res:
print("Error cuda:========================")
print(mod.imported_modules[0].get_source())
raise AssertionError("Test fail")
lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
gpu_profiling(mod, lhs, rhs, expect, 400)
def test_ms_batch_matmul_4D(shape1, shape2, dtype):
mod = utils.op_build_test(batch_matmul, (shape1, shape2), (dtype, dtype), kernel_name="batch_matmul", attrs={"target": "cuda"})
lhs, rhs, output, expect = gen_data_batch_matmul(shape1, shape2, dtype)
output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
# res = True
print("Test {}".format("Pass" if res else "Fail"))
if not res:
print("Error cuda:========================")
print(mod.imported_modules[0].get_source())
# raise AssertionError("Test fail")
lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
tcost = gpu_profiling(mod, lhs, rhs, expect, 400)
return tcost
def test_ms_batch_matmul_3D(shape1, shape2, dtype):
mod = utils.op_build_test(batch_matmul, (shape1, shape2), (dtype, dtype), kernel_name="batch_matmul", attrs={"target": "cuda"})
lhs, rhs, output, expect = gen_data_batch_matmul_3D(shape1, shape2, dtype)
output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
# res = True
print("Test {}".format("Pass" if res else "Fail"))
if not res:
print("Error cuda:========================")
print(mod.imported_modules[0].get_source())
# raise AssertionError("Test fail")
lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
tcost = gpu_profiling(mod, lhs, rhs, expect, 400)
return tcost
if __name__ == '__main__':
seq_len = 512
print('--------------non-split------------------')
# t = test_ms_batch_matmul_4D((64, 16, seq_len, 64), (64, 16, seq_len, 64), 'float32')
# t = test_ms_batch_matmul_3D((64, seq_len, 64), (64, seq_len, 64), 'float32')
t = test_ms_batch_matmul_3D((16, 4, 4), (16, 4, 4), 'float32')
exit()
print('non-split time: {}ms'.format(t))
print('--------------split------------------')
O_mask = generate_mask(seq_len // 16, 16, 4, 1)
cubes_list = split(np.ones((seq_len,64), dtype=bool), np.ones((64, seq_len), dtype=bool), O_mask)
t = 0
for cube in cubes_list:
t += test_ms_batch_matmul((64, 16, cube.bottom-cube.top, 64), (64, 16, cube.right-cube.left, 64), 'float32')
print('split time: {}ms'.format(t))
# test_ms_batch_matmul((64, 16, 64, 64), (64, 16, 64, 64), 'float32')
# test_ms_batch_matmul((64, 16, 128, 64), (64, 16, 16, 64), 'float32')
# test_ms_batch_matmul((64, 16, 64, 64), (64, 16, 64, 64), 'float32')
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/MondayYuan/sparsity_compiler.git
git@gitee.com:MondayYuan/sparsity_compiler.git
MondayYuan
sparsity_compiler
sparsity_compiler
main

搜索帮助