From 0d7f6b8f7a229b762fc5540367218b186100b5f3 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Tue, 13 Aug 2024 11:37:29 +0000 Subject: [PATCH 1/8] fix namespace problem Signed-off-by: jiangchengcheng-on --- .../op_kernel/matmul_custom.cpp | 31 +++++++------- .../op_kernel/matmul_custom.cpp | 33 +++++++-------- .../MatMulInvocationNeo/matmul_custom.cpp | 24 +++++------ .../op_kernel/matmul_leakyrelu_custom.cpp | 40 ++++++++++--------- 4 files changed, 66 insertions(+), 62 deletions(-) diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp index 24e657461..6422f2114 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp @@ -23,19 +23,20 @@ public: __aicore__ inline MatmulKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, const TCubeTiling &tiling); - template __aicore__ inline void Process(TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; TCubeTiling tiling; }; @@ -53,7 +54,7 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR int32_t offsetB = 0; int32_t offsetC = 0; int32_t offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -65,13 +66,13 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR template template -__aicore__ inline void MatmulKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulKernel::Process(AscendC::TPipe *pipe) { if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; - pipe->InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmformatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; + pipe->InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmformatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); matmulObj.SetLocalWorkspace(mmformatUb); } @@ -102,7 +103,7 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD { GET_TILING_DATA(tilingData, tiling); MatmulKernel matmulKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tilingData.cubeTilingData); matmulKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData); if (TILING_KEY_IS(1)) { diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp index c4261f25c..c483558e1 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp @@ -23,19 +23,20 @@ public: __aicore__ inline MatmulKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, const TCubeTiling &tiling); - template __aicore__ inline void Process(TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; TCubeTiling tiling; }; @@ -53,7 +54,7 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR int32_t offsetB = 0; int32_t offsetC = 0; int32_t offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -65,16 +66,16 @@ __aicore__ inline void MatmulKernel::Init(GM_ADDR template template -__aicore__ inline void MatmulKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulKernel::Process(AscendC::TPipe *pipe) { - if (GetBlockIdx() >= 1) { + if (AscendC::GetBlockIdx() >= 1) { return; } if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; - pipe->InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmformatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; + pipe->InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmformatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); matmulObj.SetLocalWorkspace(mmformatUb); } @@ -105,7 +106,7 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD { GET_TILING_DATA(tilingData, tiling); MatmulKernel matmulKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tilingData.cubeTilingData); matmulKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData); if (TILING_KEY_IS(1)) { diff --git a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp index 41daf97be..b34687cbf 100644 --- a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp +++ b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp @@ -60,13 +60,13 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD using B_T = half; using C_T = float; - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ A_T *>(a), tiling.M * tiling.Ka); bGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ B_T *>(b), tiling.Ka * tiling.N); cGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ C_T *>(c), tiling.M * tiling.N); @@ -79,21 +79,21 @@ extern "C" __global__ __aicore__ void matmul_custom(GM_ADDR a, GM_ADDR b, GM_ADD int tailM = 0; int tailN = 0; - CalcGMOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, tailM, tailN, isTransA, isTransB); + CalcGMOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, tailM, tailN, isTransA, isTransB); auto gmA = aGlobal[offsetA]; auto gmB = bGlobal[offsetB]; auto gmC = cGlobal[offsetC]; - Matmul, - MatmulType, - MatmulType> mm; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType> mm; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), mm, &tiling); #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmFormatUb; - pipe.InitBuffer(tmpMMFormatUb, TOTAL_VEC_LOCAL_SIZE); - mmFormatUb = tmpMMFormatUb.Get(TOTAL_VEC_LOCAL_SIZE); + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmFormatUb; + pipe.InitBuffer(tmpMMFormatUb, AscendC::TOTAL_VEC_LOCAL_SIZE); + mmFormatUb = tmpMMFormatUb.Get(AscendC::TOTAL_VEC_LOCAL_SIZE); mm.SetLocalWorkspace(mmFormatUb); #endif mm.SetTensorA(gmA, isTransA); diff --git a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp index a9020c959..5fa11eda7 100644 --- a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp @@ -22,8 +22,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, float alpha, TPipe *pipe); - template __aicore__ inline void Process(TPipe *pipe); + const TCubeTiling &tiling, float alpha, AscendC::TPipe *pipe); + template __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -31,24 +31,25 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; float alpha; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, float alpha, TPipe *pipe) + const TCubeTiling &tiling, float alpha, AscendC::TPipe *pipe) { this->tiling = tiling; this->alpha = alpha; @@ -61,7 +62,7 @@ MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ int offsetB = 0; int offsetC = 0; int offsetBias = 0; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -74,12 +75,12 @@ MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ template template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; if constexpr (setTmpSpace) { - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -117,8 +118,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, + (uint16_t)(tiling.baseN * sizeof(cType) / AscendC::DEFAULT_C0_SIZE), 0, + (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); } @@ -144,7 +146,7 @@ extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR { GET_TILING_DATA(tilingData, tiling); MatmulLeakyKernel matmulLeakyKernel; - TPipe pipe; + AscendC::TPipe pipe; REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &tilingData.cubeTilingData); matmulLeakyKernel.Init(a, b, bias, c, workspace, tilingData.cubeTilingData, tilingData.alpha, &pipe); if (TILING_KEY_IS(1)) { -- Gitee From b2d331bd27ea2003b107c05f04e22f1a61d30dbb Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Tue, 13 Aug 2024 11:39:47 +0000 Subject: [PATCH 2/8] reduce namespace Signed-off-by: jiangchengcheng-on --- .../MatmulCustomMultiCore/op_kernel/matmul_custom.cpp | 3 --- .../MatmulCustomSingleCore/op_kernel/matmul_custom.cpp | 3 --- .../KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp | 3 --- .../op_kernel/matmul_leakyrelu_custom.cpp | 3 --- 4 files changed, 12 deletions(-) diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp index 6422f2114..e47f9916d 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_kernel/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp index c483558e1..7b146958f 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; diff --git a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp index b34687cbf..ab694b77b 100644 --- a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp +++ b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; diff --git a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp index 5fa11eda7..bec055f7f 100644 --- a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_kernel/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; -- Gitee From 9d8ff9f8ccbb3e95a9be82152567f41fc406dc48 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 02:07:33 +0000 Subject: [PATCH 3/8] fix using namespace Signed-off-by: jiangchengcheng-on --- .../op_host/matmul_custom.cpp | 13 ++++--- .../op_kernel/matmul_custom.cpp | 2 +- .../matmul_custom_tiling.cpp | 21 ++++++----- .../op_host/matmul_leakyrelu_custom.cpp | 15 ++++---- .../matmul_leakyrelu_custom.cpp | 35 +++++++++--------- .../matmul_leakyrelu_custom_tiling.cpp | 27 +++++++------- .../matmul_leakyrelu_custom.cpp | 36 +++++++++---------- .../matmul_leakyrelu_custom_tiling.cpp | 27 +++++++------- 8 files changed, 87 insertions(+), 89 deletions(-) diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp index 93bbde8bc..5be017579 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp @@ -11,7 +11,6 @@ #include "register/op_def_registry.h" #include "tiling/platform/platform_ascendc.h" #include "tiling/tiling_api.h" -using namespace matmul_tiling; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext *context) @@ -24,18 +23,18 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) int32_t K = shape_a.GetDim(1); int32_t baseM = 128; int32_t baseN = 128; - MultiCoreMatmulTiling cubeTiling(ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(2); - cubeTiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - cubeTiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetAType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetBType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetCType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetBiasType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetFixSplit(baseM, baseN, -1); cubeTiling.SetBias(true); cubeTiling.SetBufferSpace(-1, -1, -1); - MatmulCustomTilingData tiling; + matmul_tiling::MatmulCustomTilingData tiling; if (cubeTiling.GetTiling(tiling.cubeTilingData) == -1) { return ge::GRAPH_FAILED; } diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp index 7b146958f..f9ce62b18 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp @@ -25,7 +25,7 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - matmul::Matmul, + Matmul, matmul::MatmulType, matmul::MatmulType, matmul::MatmulType> matmulObj; diff --git a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp index ce527d41f..281bc4f07 100644 --- a/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp +++ b/operator/MatMulCustomSample/KernelLaunch/MatMulInvocationNeo/matmul_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,19 +31,19 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 1024; int K = 512; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = false; @@ -54,7 +53,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); diff --git a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp index 82e64b5f4..506c2e116 100644 --- a/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/FrameworkLaunch/MatmulLeakyReluCustom/op_host/matmul_leakyrelu_custom.cpp @@ -10,7 +10,6 @@ #include "matmul_leakyrelu_custom_tiling.h" #include "register/op_def_registry.h" #include "tiling/tiling_api.h" -using namespace matmul_tiling; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext *context) @@ -21,12 +20,16 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) int32_t baseM = 128; int32_t baseN = 128; auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo()); - MultiCoreMatmulTiling cubeTiling(ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(2); - cubeTiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetCType(TPosition::VECIN, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - cubeTiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetCType(matmul_tiling::TPosition::VECIN, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetBiasType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetFixSplit(baseM, baseN, -1); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp index d7620c358..e220d9ceb 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -33,8 +30,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe); - __aicore__ inline void Process(TPipe *pipe); + const TCubeTiling &tiling, AscendC::TPipe *pipe); + __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -42,23 +39,25 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe) + const TCubeTiling &tiling,AscendC::TPipe *pipe) { this->tiling = tiling; aGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ aType *>(a), tiling.M * tiling.Ka); @@ -76,13 +75,13 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM } template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -145,7 +144,7 @@ MatmulLeakyKernel::CalcOffset(int32_t blockIdx, c extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) { - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp index 287777e53..775aec320 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,23 +31,23 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 640; int K = 256; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPosition = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = true; int usedCoreNum = 2; @@ -57,7 +56,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp index 2a729acc7..27c480aad 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -33,7 +30,7 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe); + const TCubeTiling &tiling, AscendC::TPipe *pipe); __aicore__ inline void Process(); __aicore__ inline void MatmulCompute(); @@ -42,19 +39,21 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - GlobalTensor workspaceGlobal; - LocalTensor reluInLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::GlobalTensor workspaceGlobal; + AscendC::LocalTensor reluInLocal; TCubeTiling tiling; - TQue reluInQueue; - TQue reluOutQueue; + AscendC::TQue reluInQueue; + AscendC::TQue reluOutQueue; uint32_t splitRowNums = 0; uint32_t splitRowSize = 0; }; @@ -62,7 +61,7 @@ public: template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - const TCubeTiling &tiling, TPipe *pipe) + const TCubeTiling &tiling, AscendC::TPipe *pipe) { this->tiling = tiling; splitRowNums = 4; @@ -127,8 +126,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / splitRowSize; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * splitRowSize * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / + AscendC::DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * + sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue.FreeTensor(reluOutLocal); } @@ -152,7 +152,7 @@ MatmulLeakyKernel::CalcOffset(int32_t blockIdx, c extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tilingGm) { - TPipe pipe; + AscendC::TPipe pipe; TCubeTiling tiling; CopyTiling(&tiling, tilingGm); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp index 332002216..5b433f679 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp @@ -15,7 +15,6 @@ #include "tiling/tiling_api.h" #include "tiling/platform/platform_ascendc.h" -using namespace matmul_tiling; using namespace std; // typically tiling size wont be greater than 32k #define TILING_MAX_LEN 32768 @@ -37,23 +36,23 @@ uint8_t *GenerateTiling(const char *socVersion) int N = 640; int K = 256; - TPosition leftPosition = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransA = false; - TPosition rightPosition = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; bool isTransB = false; - TPosition resultPosition = TPosition::GM; - CubeFormat resultFormat = CubeFormat::ND; - DataType resultDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resultPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resultFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType resultDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPosition = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPosition = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; bool isBias = true; int usedCoreNum = 2; @@ -62,7 +61,7 @@ uint8_t *GenerateTiling(const char *socVersion) optiling::TCubeTiling tilingData; auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); - MultiCoreMatmulTiling tilingApi(*ascendcPlatform); + matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPosition, leftFormat, leftDtype, isTransA); -- Gitee From 83b95ae07eb44a33bb34f873efee0ba9e062eefd Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 02:09:29 +0000 Subject: [PATCH 4/8] fix wrong using Signed-off-by: jiangchengcheng-on --- .../op_host/matmul_custom.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp index 5be017579..68c0c6550 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomMultiCore/op_host/matmul_custom.cpp @@ -25,16 +25,20 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) int32_t baseN = 128; matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform); cubeTiling.SetDim(2); - cubeTiling.SetAType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetBType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - cubeTiling.SetCType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - cubeTiling.SetBiasType(AscendC::TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT16); + cubeTiling.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); + cubeTiling.SetBiasType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, + matmul_tiling::DataType::DT_FLOAT); cubeTiling.SetShape(M, N, K); cubeTiling.SetOrgShape(M, N, K); cubeTiling.SetFixSplit(baseM, baseN, -1); cubeTiling.SetBias(true); cubeTiling.SetBufferSpace(-1, -1, -1); - matmul_tiling::MatmulCustomTilingData tiling; + MatmulCustomTilingData tiling; if (cubeTiling.GetTiling(tiling.cubeTilingData) == -1) { return ge::GRAPH_FAILED; } -- Gitee From 7530b3befdfb6d2f2a241d19a3a2162f203108c9 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 03:12:53 +0000 Subject: [PATCH 5/8] fix error Signed-off-by: jiangchengcheng-on --- .../MatmulCustomSingleCore/op_kernel/matmul_custom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp index f9ce62b18..7b146958f 100644 --- a/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp +++ b/operator/MatMulCustomSample/FrameworkLaunch/MatmulCustomSingleCore/op_kernel/matmul_custom.cpp @@ -25,7 +25,7 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, + matmul::Matmul, matmul::MatmulType, matmul::MatmulType, matmul::MatmulType> matmulObj; -- Gitee From c61ef4fe872b8889b3910ca6045d0394dca1e3da Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 03:44:12 +0000 Subject: [PATCH 6/8] fix error Signed-off-by: jiangchengcheng-on --- .../MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp index e220d9ceb..650510721 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom.cpp @@ -66,7 +66,7 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -119,8 +119,9 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, - (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / + AscendC::DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * + sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); } -- Gitee From 6bbd3ef4565e98c06abb2eacbcd84d3e85545ec6 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 06:36:03 +0000 Subject: [PATCH 7/8] fix error Signed-off-by: jiangchengcheng-on --- .../CppExtensions/matmul_leakyrelu_custom.cpp | 36 +++++++++---------- .../matmul_leakyrelu_custom_tiling.cpp | 29 ++++++++------- .../matmul_leakyrelu_custom_tiling.cpp | 2 +- .../matmul_leakyrelu_custom_tiling.cpp | 2 +- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp index 559aa0787..3382daac5 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp @@ -10,9 +10,6 @@ #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace AscendC; -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; @@ -22,8 +19,8 @@ template cla public: __aicore__ inline MatmulLeakyKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, GM_ADDR tiling, - TPipe *pipe); - __aicore__ inline void Process(TPipe *pipe); + AscendC::TPipe *pipe); + __aicore__ inline void Process(AscendC::TPipe *pipe); __aicore__ inline void MatmulCompute(); __aicore__ inline void LeakyReluCompute(); @@ -31,23 +28,24 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, int32_t usedCoreNum, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> - matmulObj; + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; - GlobalTensor aGlobal; - GlobalTensor bGlobal; - GlobalTensor cGlobal; - GlobalTensor biasGlobal; - LocalTensor reluOutLocal; + AscendC::GlobalTensor aGlobal; + AscendC::GlobalTensor bGlobal; + AscendC::GlobalTensor cGlobal; + AscendC::GlobalTensor biasGlobal; + AscendC::LocalTensor reluOutLocal; TCubeTiling tiling; - TQue reluOutQueue_; + AscendC::TQue reluOutQueue_; }; template __aicore__ inline void MatmulLeakyKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, GM_ADDR workspace, - GM_ADDR tilingGM, TPipe *pipe) + GM_ADDR tilingGM, AscendC::TPipe *pipe) { auto tempTilingGM = (__gm__ uint32_t *)tilingGM; auto tempTiling = (uint32_t *)&tiling; @@ -72,13 +70,13 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM } template -__aicore__ inline void MatmulLeakyKernel::Process(TPipe *pipe) +__aicore__ inline void MatmulLeakyKernel::Process(AscendC::TPipe *pipe) { uint32_t computeRound = 0; #ifdef CUSTOM_ASCEND310P - TBuf<> tmpMMFormatUb; - LocalTensor mmformatUb; + AscendC::TBuf<> tmpMMFormatUb; + AscendC::LocalTensor mmformatUb; pipe->InitBuffer(tmpMMFormatUb, tiling.baseM * tiling.baseN * sizeof(cType)); mmformatUb = tmpMMFormatUb.Get(tiling.baseM * tiling.baseN * sizeof(cType)); matmulObj.SetLocalWorkspace(mmformatUb); @@ -142,7 +140,7 @@ extern "C" __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR GM_ADDR workspace, GM_ADDR tiling) { MatmulLeakyKernel matmulLeakyKernel; - TPipe pipe; + AscendC::TPipe pipe; matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe); REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); matmulLeakyKernel.Process(&pipe); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp index d0b8b19a3..5f1390a55 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom_tiling.cpp @@ -14,7 +14,6 @@ #include #include "tiling/tiling_api.h" -using namespace matmul_tiling; using namespace std; uint8_t *GetTilingBuf(optiling::TCubeTiling *tilingData) @@ -32,28 +31,28 @@ uint8_t *GenerateTiling() int K = 256; int baseM = 256; int baseN = 128; - TPosition leftPos = TPosition::GM; - CubeFormat leftFormat = CubeFormat::ND; - DataType leftDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition leftPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat leftFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType leftDtype = matmul_tiling::DataType::DT_FLOAT16; int transposeA = 0; - TPosition rightPos = TPosition::GM; - CubeFormat rightFormat = CubeFormat::ND; - DataType rightDtype = DataType::DT_FLOAT16; + matmul_tiling::TPosition rightPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat rightFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType rightDtype = matmul_tiling::DataType::DT_FLOAT16; int transposeB = 0; - TPosition resPos = TPosition::GM; - CubeFormat resFormat = CubeFormat::ND; - DataType resDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition resPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat resFormat = matmul_tiling::ubeFormat::ND; + matmul_tiling::DataType resDtype = matmul_tiling::DataType::DT_FLOAT; - TPosition biasPos = TPosition::GM; - CubeFormat biasFormat = CubeFormat::ND; - DataType biasDtype = DataType::DT_FLOAT; + matmul_tiling::TPosition biasPos = matmul_tiling::TPosition::GM; + matmul_tiling::CubeFormat biasFormat = matmul_tiling::CubeFormat::ND; + matmul_tiling::DataType biasDtype = matmul_tiling::DataType::DT_FLOAT; int isBias = 1; int usedCoreNum = 2; optiling::TCubeTiling tilingData; tilingData.set_usedCoreNum(usedCoreNum); - MultiCoreMatmulTiling tilingApi; + matmul_tiling::MultiCoreMatmulTiling tilingApi; tilingApi.SetDim(usedCoreNum); tilingApi.SetAType(leftPos, leftFormat, leftDtype, bool(transposeA)); tilingApi.SetBType(rightPos, rightFormat, rightDtype, bool(transposeB)); @@ -63,7 +62,7 @@ uint8_t *GenerateTiling() tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(bool(isBias)); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1); int64_t res = tilingApi.GetTiling(tilingData); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp index 775aec320..0ce3d2326 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocation/matmul_leakyrelu_custom_tiling.cpp @@ -67,7 +67,7 @@ uint8_t *GenerateTiling(const char *socVersion) tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(isBias); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp index 5b433f679..8e5b0f7cd 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom_tiling.cpp @@ -72,7 +72,7 @@ uint8_t *GenerateTiling(const char *socVersion) tilingApi.SetOrgShape(M, N, K); tilingApi.SetShape(M, N, K); tilingApi.SetBias(isBias); - tilingApi.SetTraverse(MatrixTraverse::FIRSTM); + tilingApi.SetTraverse(matmul_tiling::MatrixTraverse::FIRSTM); tilingApi.SetFixSplit(baseM, baseN, -1); tilingApi.SetBufferSpace(-1, -1, -1); -- Gitee From 9b7f7d67512f7fe3bc99ad9177ada87f6d33f6f7 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Mon, 26 Aug 2024 07:11:45 +0000 Subject: [PATCH 8/8] fix error Signed-off-by: jiangchengcheng-on --- .../KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp | 4 ++-- .../matmul_leakyrelu_custom.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp index 3382daac5..dbab44ef1 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/CppExtensions/matmul_leakyrelu_custom.cpp @@ -58,7 +58,7 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM biasGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ biasType *>(bias), tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling.usedCoreNum, tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling.usedCoreNum, tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; @@ -114,7 +114,7 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / tiling.baseM; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * tiling.baseM * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, + AscendC::DataCopyParams copyParam = {(uint16_t)tiling.baseM, (uint16_t)(tiling.baseN * sizeof(cType) / DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); reluOutQueue_.FreeTensor(reluOutLocal); diff --git a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp index 27c480aad..6bea011af 100644 --- a/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp +++ b/operator/MatMulLeakyReluCustomSample/KernelLaunch/MatMulLeakyReluInvocationAsync/matmul_leakyrelu_custom.cpp @@ -73,12 +73,12 @@ __aicore__ inline void MatmulLeakyKernel::Init(GM workspaceGlobal.SetGlobalBuffer(reinterpret_cast<__gm__ cType *>(workspace), tiling.M * tiling.N); int32_t offsetA, offsetB, offsetC, offsetBias; - CalcOffset(GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); + CalcOffset(AscendC::GetBlockIdx(), tiling, offsetA, offsetB, offsetC, offsetBias); aGlobal = aGlobal[offsetA]; bGlobal = bGlobal[offsetB]; cGlobal = cGlobal[offsetC]; biasGlobal = biasGlobal[offsetBias]; - workspaceGlobal = workspaceGlobal[GetBlockIdx() * tiling.singleCoreM * tiling.singleCoreN]; + workspaceGlobal = workspaceGlobal[AscendC::GetBlockIdx() * tiling.singleCoreM * tiling.singleCoreN]; pipe->InitBuffer(reluInQueue, 1, tiling.baseM * tiling.baseN * sizeof(cType)); pipe->InitBuffer(reluOutQueue, 1, splitRowSize * tiling.baseN * sizeof(cType)); } @@ -126,7 +126,7 @@ __aicore__ inline void MatmulLeakyKernel::CopyOut const uint32_t roundM = tiling.singleCoreM / splitRowSize; const uint32_t roundN = tiling.singleCoreN / tiling.baseN; uint32_t startOffset = (count % roundM * splitRowSize * tiling.N + count / roundM * tiling.baseN); - DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / + AscendC::DataCopyParams copyParam = {(uint16_t)splitRowSize, (uint16_t)(tiling.baseN * sizeof(cType) / AscendC::DEFAULT_C0_SIZE), 0, (uint16_t)((tiling.N - tiling.baseN) * sizeof(cType) / AscendC::DEFAULT_C0_SIZE)}; DataCopy(cGlobal[startOffset], reluOutLocal, copyParam); -- Gitee