main

分支 (1)

管理

管理

main

sparsity_compiler
/
test_akg.py

import numpy as np

from gen_random import random_gaussian
from akg.utils import kernel_exec as utils
from akg.utils.result_analysis import gpu_profiling
from akg.utils.format_transform import to_tvm_nd_array
from akg.ops.math_gpu.add import add
from akg.ops.math_gpu.batch_matmul import batch_matmul

from split import split
from my_utils import generate_mask

def gen_data(shape1, shape2, dtype):
    support_list = {"float16": np.float16, "float32": np.float32}
    lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
    rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
    expect = np.add(lhs, rhs)
    output = np.full(expect.shape, np.nan, dtype)
    return lhs, rhs, output, expect

def gen_data_batch_matmul(shape1, shape2, dtype):
    support_list = {"float16": np.float16, "float32": np.float32}
    lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
    rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
    expect = np.einsum("bhik, bhjk -> bhij", lhs, rhs)
    # expect = np.zeros((shape1[0], shape1[1], shape1[2],shape2[2]))
    output = np.full((shape1[0], shape1[1], shape1[2],shape2[2]), np.nan, dtype)
    return lhs, rhs, output, expect


def gen_data_batch_matmul_3D(shape1, shape2, dtype):
    support_list = {"float16": np.float16, "float32": np.float32}
    # lhs = random_gaussian(shape1, miu=1, sigma=0.1).astype(support_list[dtype])
    # rhs = random_gaussian(shape2, miu=1, sigma=0.1).astype(support_list[dtype])
    lhs = np.random.randint(1, 100, shape1).astype(support_list[dtype])
    rhs = np.random.randint(1, 100, shape2).astype(support_list[dtype])
    expect = np.einsum("bik, bjk -> bij", lhs, rhs)
    # expect = np.zeros((shape1[0], shape1[1], shape1[2],shape2[2]))
    output = np.full((shape1[0], shape1[1],shape2[1]), np.nan, dtype)
    return lhs, rhs, output, expect

def test_ms_add(shape1, shape2, dtype):
    mod = utils.op_build_test(add, (shape1, shape2), (dtype, dtype), kernel_name="add", attrs={"target": "cuda"})
    lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)


def test_ms_batch_matmul_4D(shape1, shape2, dtype):
    mod = utils.op_build_test(batch_matmul, (shape1, shape2), (dtype, dtype), kernel_name="batch_matmul", attrs={"target": "cuda"})
    lhs, rhs, output, expect = gen_data_batch_matmul(shape1, shape2, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    # res = True
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        # raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    tcost = gpu_profiling(mod, lhs, rhs, expect, 400)
    return tcost

def test_ms_batch_matmul_3D(shape1, shape2, dtype):
    mod = utils.op_build_test(batch_matmul, (shape1, shape2), (dtype, dtype), kernel_name="batch_matmul", attrs={"target": "cuda"})
    lhs, rhs, output, expect = gen_data_batch_matmul_3D(shape1, shape2, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    # res = True
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        # raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    tcost = gpu_profiling(mod, lhs, rhs, expect, 400)
    return tcost

if __name__ == '__main__':
    seq_len = 512
    print('--------------non-split------------------')
    # t = test_ms_batch_matmul_4D((64, 16, seq_len, 64), (64, 16, seq_len, 64), 'float32')
    # t = test_ms_batch_matmul_3D((64, seq_len, 64), (64, seq_len, 64), 'float32')
    t = test_ms_batch_matmul_3D((16, 4, 4), (16, 4, 4), 'float32')
    exit()
    print('non-split time: {}ms'.format(t))
    print('--------------split------------------')
    O_mask = generate_mask(seq_len // 16, 16, 4, 1)
    cubes_list = split(np.ones((seq_len,64), dtype=bool), np.ones((64, seq_len), dtype=bool), O_mask)

    t = 0
    for cube in cubes_list:
        t += test_ms_batch_matmul((64, 16, cube.bottom-cube.top, 64), (64, 16, cube.right-cube.left, 64), 'float32')
    print('split time: {}ms'.format(t))
    # test_ms_batch_matmul((64, 16, 64, 64), (64, 16, 64, 64), 'float32')
    # test_ms_batch_matmul((64, 16, 128, 64), (64, 16, 16, 64), 'float32')
    # test_ms_batch_matmul((64, 16, 64, 64), (64, 16, 64, 64), 'float32')