diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp index 93bbde8bcf51f128de4cf3abd7d01ed49331bd6a..68c0c6550b372f889ed60324e1cc2d82a4c011f7 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp @@ -11,7 +11,6 @@ #include "register/op_def_registry.h" #include "tiling/platform/platform_ascendc.h" #include "tiling/tiling_api.h" -using namespace matmul_tiling; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext *context) @@ -24,12 +23,16 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) int32_t K = shape_a.GetDim(1); int32_t baseM = 128; int32_t baseN = 128; - MultiCoreMatmulTiling cubeTiling(ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(2); - cubeTiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - cubeTiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetBiasType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetFixSplit(baseM, baseN, -1); diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp index 24e657461036947b4c06f616a76c83ddcf2ffbe8..e47f9916d83b9d7a6da1ed1e93dd682e639957f4 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -23,19 +20,20 @@ public: __aicore__ inline MatmulKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, const TCubeTiling &tiling); - template __aicore__ inline void Process(TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; TCubeTiling tiling; }; @@ -53,7 +51,7 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR int32_t offsetB = 0; int32_t offsetC = 0; int32_t offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -65,13 +63,13 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR template template -__aicore__ inline void MatmulKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulKernel::Process(AscendC::TPipe *pipe) { if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; - pipe->InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmformatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; + pipe->InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmformatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); matmulObj.SetLocalWorkspace(mmformatUb); } @@ -102,7 +100,7 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD { GET_TILING_DATA(tilingData, tiling); MatmulKernel matmulKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tilingData.cubeTilingData); matmulKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData); if (TILING_KEY_IS(1)) { diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp index c4261f25c1851edfae4449a9c53954d85f8115ef..7b146958f1bfeea34efcdb2b83121dfc2d9e23d6 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -23,19 +20,20 @@ public: __aicore__ inline MatmulKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, const TCubeTiling &tiling); - template __aicore__ inline void Process(TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; TCubeTiling tiling; }; @@ -53,7 +51,7 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR int32_t offsetB = 0; int32_t offsetC = 0; int32_t offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -65,16 +63,16 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR template template -__aicore__ inline void MatmulKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulKernel::Process(AscendC::TPipe *pipe) { - if (GetBlockIdx() >= 1) { + if (AscendC::GetBlockIdx() >= 1) { return; } if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; - pipe->InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmformatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; + pipe->InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmformatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); matmulObj.SetLocalWorkspace(mmformatUb); } @@ -105,7 +103,7 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD { GET_TILING_DATA(tilingData, tiling); MatmulKernel matmulKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tilingData.cubeTilingData); matmulKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData); if (TILING_KEY_IS(1)) { diff --git a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp index 41daf97be1a09155e7087f56e0c97eeed35c4fa0..ab694b77b96e97b23429cc2d1077204302682bdd 100644 --- a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp +++ b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -60,13 +57,13 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD using B_T = half; using C_T = float; - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ A_T *>(a), tiling.M * tiling.Ka); bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ B_T *>(b), tiling.Ka * tiling.N); cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ C_T *>(c), tiling.M * tiling.N); @@ -79,21 +76,21 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD int tailM = 0; int tailN = 0; - CalcGMOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, tailM, tailN, isTransA, isTransB); + CalcGMOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, tailM, tailN, isTransA, isTransB); auto gmA = aGlobal[offsetA]; auto gmB = bGlobal[offsetB]; auto gmC = cGlobal[offsetC]; - Matmul, - MatmulType, - MatmulType> mm; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType> mm; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), mm, &tiling); #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmFormatUb; - pipe.InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmFormatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmFormatUb; + pipe.InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmFormatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); mm.SetLocalWorkspace(mmFormatUb); #endif mm.SetTensorA(gmA, isTransA); diff --git a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp index ce527d41f186aa72b3b4cc0a7cac9f8f5325d334..281bc4f07cd26b380c651617b98f2191631bc1b9 100644 --- a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp +++ b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,19 +31,19 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 1024; int K = 512; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = false; @@ -54,7 +53,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); diff --git a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp index 82e64b5f42f9fb2cb3fd31d3300979ce9e1e612d..506c2e1165e6037664636f6b6b182877b4c9dce8 100644 --- a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp @@ -10,7 +10,6 @@ #include "matmul_leakyrelu_custom_tiling.h" #include "register/op_def_registry.h" #include "tiling/tiling_api.h" -using namespace matmul_tiling; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext *context) @@ -21,12 +20,16 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) int32_t baseM = 128; int32_t baseN = 128; auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo()); - MultiCoreMatmulTiling cubeTiling(ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(2); - cubeTiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetCType(TPosition::VECIN, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - cubeTiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetCType(matmul_tiling::TPosition::VECIN, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetBiasType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetFixSplit(baseM, baseN, -1); diff --git a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp index a9020c9597929aaba217418bb790cb6f17570c2d..bec055f7fac67442fc1de283d506165d4eb6e097 100644 --- a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -22,8 +19,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, float alpha, TPipe *pipe); - template __aicore__ inline void Process(TPipe *pipe); + const TCubeTiling &tiling, float alpha, AscendC::TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -31,24 +28,25 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; float alpha; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, float alpha, TPipe *pipe) + const TCubeTiling &tiling, float alpha, AscendC::TPipe *pipe) { this->tiling = tiling; this->alpha = alpha; @@ -61,7 +59,7 @@ MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ int offsetB = 0; int offsetC = 0; int offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -74,12 +72,12 @@ MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ template template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -117,8 +115,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, + (uint16_t)(tiling.baseN * sizeof(cType) / AscendC::DEFAULT_C0_SIZE), 0, + (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); } @@ -144,7 +143,7 @@ extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR { GET_TILING_DATA(tilingData, tiling); MatmulLeakyKernel matmulLeakyKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &tilingData.cubeTilingData); matmulLeakyKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData, tilingData.alpha, &pipe); if (TILING_KEY_IS(1)) { diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp index 559aa078778c5a8828927656e98aea74745622fa..dbab44ef1b8c5f53f3122edf4bdecd730d3b776d 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -22,8 +19,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tiling, - TPipe *pipe); - __aicore__ inline void Process(TPipe *pipe); + AscendC::TPipe *pipe); + __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -31,23 +28,24 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, int32_t usedCoreNum, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - GM_ADDR tilingGM, TPipe *pipe) + GM_ADDR tilingGM, AscendC::TPipe *pipe) { auto tempTilingGM = (__gm__ uint32_t *)tilingGM; auto tempTiling = (uint32_t *)&tiling; @@ -60,7 +58,7 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling.usedCoreNum, tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling.usedCoreNum, tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -72,13 +70,13 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM } template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -116,7 +114,7 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); @@ -142,7 +140,7 @@ extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR GM_ADDR workspace, GM_ADDR tiling) { MatmulLeakyKernel matmulLeakyKernel; - TPipe pipe; + AscendC::TPipe pipe; matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe); REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); matmulLeakyKernel.Process(&pipe); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp index d0b8b19a334f6de89ec5200775f20b1f5cea0d22..5f1390a55aafea12523327f2cc18cb33a129fdd7 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp @@ -14,7 +14,6 @@ #include #include "tiling/tiling_api.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,28 +31,28 @@ uint8_t *GenerateTiling() int K = 256; int baseM = 256; int baseN = 128; - TPosition leftPos = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; int transposeA = 0; - TPosition rightPos = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; int transposeB = 0; - TPosition resPos = TPosition::GM; - CubeFormat resFormat = CubeFormat::ND; - DataType resDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resFormat = matmul_tiling::ubeFormat::ND; + matmul_tiling::DataType resDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPos = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; int isBias = 1; int usedCoreNum = 2; optiling::TCubeTiling tilingData; tilingData.set_usedCoreNum(usedCoreNum); - MultiCoreMatmulTiling tilingApi; + matmul_tiling::MultiCoreMatmulTiling tilingApi; tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPos, leftFormat, leftDtype, bool(transposeA)); tilingApi.SetBType(rightPos, rightFormat, rightDtype, bool(transposeB)); @@ -63,7 +62,7 @@ uint8_t *GenerateTiling() tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(bool(isBias)); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1); int64_t res = tilingApi.GetTiling(tilingData); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp index d7620c3586467d44f5d1a11b109bf48dd5415291..6505107215d50c8fe8672ed834b99d3cc522e883 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -33,8 +30,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe); - __aicore__ inline void Process(TPipe *pipe); + const TCubeTiling &tiling, AscendC::TPipe *pipe); + __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -42,23 +39,25 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe) + const TCubeTiling &tiling,AscendC::TPipe *pipe) { this->tiling = tiling; aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka); @@ -67,7 +66,7 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -76,13 +75,13 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM } template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -120,8 +119,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / + AscendC::DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * + sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); } @@ -145,7 +145,7 @@ MatmulLeakyKernel::CalcOffset(int32_t blockIdx, c extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) { - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp index 287777e53ba89c06286c79522d3fd484fb6bb291..0ce3d2326493dc6c6575d51814f39ae17194bd31 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,23 +31,23 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 640; int K = 256; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPosition = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = true; int usedCoreNum = 2; @@ -57,7 +56,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); @@ -68,7 +67,7 @@ uint8_t *GenerateTiling(const char *socVersion) tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(isBias); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp index 2a729acc797ac7345c6c962e7c5fe06527ef98f0..6bea011af47e28ded0b91a5f6f5a73c2c86693c9 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -33,7 +30,7 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe); + const TCubeTiling &tiling, AscendC::TPipe *pipe); __aicore__ inline void Process(); __aicore__ inline void MatmulCompute(); @@ -42,19 +39,21 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - GlobalTensor workspaceGlobal; - LocalTensor reluInLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::GlobalTensor workspaceGlobal; + AscendC::LocalTensor reluInLocal; TCubeTiling tiling; - TQue reluInQueue; - TQue reluOutQueue; + AscendC::TQue reluInQueue; + AscendC::TQue reluOutQueue; uint32_t splitRowNums = 0; uint32_t splitRowSize = 0; }; @@ -62,7 +61,7 @@ public: template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe) + const TCubeTiling &tiling, AscendC::TPipe *pipe) { this->tiling = tiling; splitRowNums = 4; @@ -74,12 +73,12 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM workspaceGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(workspace), tiling.M * tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; biasGlobal = biasGlobal[offsetBias]; - workspaceGlobal = workspaceGlobal[GetBlockIdx() * tiling.singleCoreM * tiling.singleCoreN]; + workspaceGlobal = workspaceGlobal[AscendC::GetBlockIdx() * tiling.singleCoreM * tiling.singleCoreN]; pipe->InitBuffer(reluInQueue, 1, tiling.baseM * tiling.baseN * sizeof(cType)); pipe->InitBuffer(reluOutQueue, 1, splitRowSize * tiling.baseN * sizeof(cType)); } @@ -127,8 +126,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / splitRowSize; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * splitRowSize * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + AscendC::DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / + AscendC::DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * + sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue.FreeTensor(reluOutLocal); } @@ -152,7 +152,7 @@ MatmulLeakyKernel::CalcOffset(int32_t blockIdx, c extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) { - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp index 33200221687f8f037e177e0c3e697802bf7595ed..8e5b0f7cdff20c6b5fb3a96f4ac4f11ad2096348 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; // typically tiling size wont be greater than 32k #define TILING_MAX_LEN 32768 @@ -37,23 +36,23 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 640; int K = 256; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPosition = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = true; int usedCoreNum = 2; @@ -62,7 +61,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); @@ -73,7 +72,7 @@ uint8_t *GenerateTiling(const char *socVersion) tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(isBias); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1);