diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/CMakeLists.txt deleted file mode 100644 index e749cff827e98e999c6d393d6791e1b72c0a7f87..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/CMakeLists.txt +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. - -# CMake lowest version requirement -cmake_minimum_required(VERSION 3.5.1) - -# project information -project(acl_execute_add) - -# Compile options -add_compile_options(-std=c++11) - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./") - -set(INC_PATH $ENV{DDK_PATH}) - -if (NOT DEFINED ENV{DDK_PATH}) - set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") - message(STATUS "set default INC_PATH: ${INC_PATH}") -else () - message(STATUS "env INC_PATH: ${INC_PATH}") -endif() - -set(CUST_PKG_PATH "${INC_PATH}/opp/vendors/customize/op_api") - -set(LIB_PATH $ENV{NPU_HOST_LIB}) - -# Dynamic libraries in the stub directory can only be used for compilation -if (NOT DEFINED ENV{NPU_HOST_LIB}) - set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/") - set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/") - message(STATUS "set default LIB_PATH: ${LIB_PATH}") -else () - message(STATUS "env LIB_PATH: ${LIB_PATH}") -endif() - -# Header path -include_directories( - ${INC_PATH}/runtime/include - ${INC_PATH}/atc/include - ${CUST_PKG_PATH}/include -) - -# add host lib path -link_directories( - ${LIB_PATH} - ${LIB_PATH1} - ${CUST_PKG_PATH}/lib -) - -add_executable(execute_add_op - main.cpp -) - -target_link_libraries(execute_add_op - ascendcl - cust_opapi - acl_op_compiler - nnopbase - stdc++ -) - -install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/README.md deleted file mode 100644 index 7adf53649252b360d9e6d5c5e3ded515c4c7e0a1..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/README.md +++ /dev/null @@ -1,60 +0,0 @@ -## 概述 -本样例相比于AclNNInvocation样例工程,简化了工程配置。 -## 目录结构介绍 -``` -├── AclNNInvocationNaive -│ ├── CMakeLists.txt // 编译规则文件 -│ ├── main.cpp // 单算子调用应用的入口 -│ └── run.sh // 编译运行算子的脚本 -``` -## 代码实现介绍 -完成自定义算子的开发部署后,可以通过单算子调用的方式来验证单算子的功能。main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子,无需提供单算子描述文件进行离线模型的转换,直接调用单算子API接口。 - -自定义算子编译部署后,会自动生成单算子API,可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”,形如: - ```cpp - // 获取算子使用的workspace空间大小 - aclnnStatus aclnnAddCustomGetWorkspaceSize(const aclTensor *x, const aclTensor *y, const alcTensor *out, uint64_t workspaceSize, aclOpExecutor **executor); - // 执行算子 - aclnnStatus aclnnAddCustom(void *workspace, int64_t workspaceSize, aclOpExecutor **executor, aclrtStream stream); - ``` -其中aclnnAddCustomGetWorkspaceSize为第一段接口,主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后,按照workspaceSize大小申请Device侧内存,然后调用第二段接口aclnnAddCustom执行计算。具体参考[AscendCL单算子调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)>单算子API执行 章节。 -## 运行样例算子 -### 1. 编译算子工程 -运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 -### 2. aclnn调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive - ``` - - 样例编译文件修改 - - 将CMakeLists.txt文件内"/usr/local/Ascend/ascend-toolkit/latest"替换为CANN软件包安装后的实际路径。 - eg:/home/HwHiAiUser/Ascend/ascend-toolkit/latest - - - 环境变量配置 - - 需要设置NPU_HOST_LIB环境变量,以x86为例 - ```bash - export NPU_HOST_LIB=/home/HwHiAiUser/Ascend/ascend-toolkit/latest/x86_64-linux/lib64 - ``` - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后编译与运行aclnn样例,最后打印运行结果。 - ```bash - mkdir -p build - cd build - cmake .. && make - ./execute_add_op - ``` - - 用户亦可参考run.sh脚本进行编译与运行。 - ```bash - bash run.sh - ``` - -## 更新说明 -| 时间 | 更新事项 | -|------------|------------| -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/main.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/main.cpp deleted file mode 100644 index 7f4290a77654ea938f984ea4ebedbd33639ce33e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/main.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/** - * @file main.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include -#include -#include - -#include "acl/acl.h" -#include "aclnn_add_custom.h" - -#define SUCCESS 0 -#define FAILED 1 - -#define CHECK_RET(cond, return_expr) \ - do { \ - if (!(cond)) { \ - return_expr; \ - } \ - } while (0) - -#define LOG_PRINT(message, ...) \ - do { \ - printf(message, ##__VA_ARGS__); \ - } while (0) - -int64_t GetShapeSize(const std::vector &shape) -{ - int64_t shapeSize = 1; - for (auto i : shape) { - shapeSize *= i; - } - return shapeSize; -} - -int Init(int32_t deviceId, aclrtStream *stream) -{ - // Fixed code, acl initialization - auto ret = aclInit(nullptr); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return FAILED); - ret = aclrtSetDevice(deviceId); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return FAILED); - ret = aclrtCreateStream(stream); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return FAILED); - - return SUCCESS; -} - -template -int CreateAclTensor(const std::vector &hostData, const std::vector &shape, void **deviceAddr, - aclDataType dataType, aclTensor **tensor) -{ - auto size = GetShapeSize(shape) * sizeof(T); - // Call aclrtMalloc to allocate device memory - auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return FAILED); - - // Call aclrtMemcpy to copy host data to device memory - ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return FAILED); - - // Call aclCreateTensor to create a aclTensor object - *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, nullptr, 0, aclFormat::ACL_FORMAT_ND, shape.data(), - shape.size(), *deviceAddr); - return SUCCESS; -} - -void DestroyResources(std::vector tensors, std::vector deviceAddrs, aclrtStream stream, - int32_t deviceId, void *workspaceAddr = nullptr) -{ - // Release aclTensor and device - for (uint32_t i = 0; i < tensors.size(); i++) { - if (tensors[i] != nullptr) { - aclDestroyTensor(reinterpret_cast(tensors[i])); - } - if (deviceAddrs[i] != nullptr) { - aclrtFree(deviceAddrs[i]); - } - } - if (workspaceAddr != nullptr) { - aclrtFree(workspaceAddr); - } - // Destroy stream and reset device - aclrtDestroyStream(stream); - aclrtResetDevice(deviceId); - aclFinalize(); -} - -int main(int argc, char **argv) -{ - // 1. (Fixed code) Initialize device / stream, refer to the list of external interfaces of acl - // Update deviceId to your own device id - int32_t deviceId = 0; - aclrtStream stream; - auto ret = Init(deviceId, &stream); - CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return FAILED); - - // 2. Create input and output, need to customize according to the interface of the API - std::vector inputXShape = {8, 2048}; - std::vector inputYShape = {8, 2048}; - std::vector outputZShape = {8, 2048}; - void *inputXDeviceAddr = nullptr; - void *inputYDeviceAddr = nullptr; - void *outputZDeviceAddr = nullptr; - aclTensor *inputX = nullptr; - aclTensor *inputY = nullptr; - aclTensor *outputZ = nullptr; - std::vector inputXHostData(inputXShape[0] * inputXShape[1]); - std::vector inputYHostData(inputYShape[0] * inputYShape[1]); - std::vector outputZHostData(outputZShape[0] * outputZShape[1]); - for (int i = 0; i < inputXShape[0] * inputXShape[1]; ++i) { - inputXHostData[i] = aclFloatToFloat16(1.0); - inputYHostData[i] = aclFloatToFloat16(2.0); - outputZHostData[i] = aclFloatToFloat16(0.0); - } - std::vector tensors = {inputX, inputY, outputZ}; - std::vector deviceAddrs = {inputXDeviceAddr, inputYDeviceAddr, outputZDeviceAddr}; - // Create inputX aclTensor - ret = CreateAclTensor(inputXHostData, inputXShape, &inputXDeviceAddr, aclDataType::ACL_FLOAT16, &inputX); - CHECK_RET(ret == ACL_SUCCESS, DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED); - // Create inputY aclTensor - ret = CreateAclTensor(inputYHostData, inputYShape, &inputYDeviceAddr, aclDataType::ACL_FLOAT16, &inputY); - CHECK_RET(ret == ACL_SUCCESS, DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED); - // Create outputZ aclTensor - ret = CreateAclTensor(outputZHostData, outputZShape, &outputZDeviceAddr, aclDataType::ACL_FLOAT16, &outputZ); - CHECK_RET(ret == ACL_SUCCESS, DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED); - - // 3. Call the API of the custom operator library - uint64_t workspaceSize = 0; - aclOpExecutor *executor; - // Calculate the workspace size and allocate memory for it - ret = aclnnAddCustomGetWorkspaceSize(inputX, inputY, outputZ, &workspaceSize, &executor); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAddCustomGetWorkspaceSize failed. ERROR: %d\n", ret); - DestroyResources(tensors, deviceAddrs, stream, deviceId); return FAILED); - - void *workspaceAddr = nullptr; - if (workspaceSize > 0) { - ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); - DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); return FAILED); - } - // Execute the custom operator - ret = aclnnAddCustom(workspaceAddr, workspaceSize, executor, stream); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAdd failed. ERROR: %d\n", ret); - DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); return FAILED); - - // 4. (Fixed code) Synchronize and wait for the task to complete - ret = aclrtSynchronizeStream(stream); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); - DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); return FAILED); - - // 5. Get the output value, copy the result from device memory to host memory, need to modify according to the - // interface of the API - auto size = GetShapeSize(outputZShape); - std::vector resultData(size, 0); - ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outputZDeviceAddr, - size * sizeof(aclFloat16), ACL_MEMCPY_DEVICE_TO_HOST); - CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); - DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); return FAILED); - - // 6. Detroy resources, need to modify according to the interface of the API - DestroyResources(tensors, deviceAddrs, stream, deviceId, workspaceAddr); - - // print the output result - std::vector goldenData(size, aclFloatToFloat16(3.0)); - - LOG_PRINT("result is:\n"); - for (int64_t i = 0; i < 10; i++) { - LOG_PRINT("%.1f ", aclFloat16ToFloat(resultData[i])); - } - LOG_PRINT("\n"); - if (std::equal(resultData.begin(), resultData.end(), goldenData.begin())) { - LOG_PRINT("test pass\n"); - } else { - LOG_PRINT("test failed\n"); - } - return SUCCESS; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/run.sh deleted file mode 100644 index c30cf7d23f9cd3387327781129e0fb2ed7a69c99..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocationNaive/run.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash -export DDK_PATH=$_ASCEND_INSTALL_PATH -export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64 - -set -e -rm -rf build -mkdir -p build -cmake -B build -cmake --build build -j -( - cd build - ./execute_add_op -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/README.md deleted file mode 100644 index 2236bae925e5396f6d0cf3ba95be891c934fac0a..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/README.md +++ /dev/null @@ -1,24 +0,0 @@ -## 使用aclopExecuteV2模型调用的方式调用AddTemplateCustom算子工程 -该样例暂不支持Atlas 200/500 A2 推理产品。 - -## 运行样例算子 -### 1.编译算子工程 -运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 -### 2.aclopExecuteV2调用的方式调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel - ``` - - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后编译与运行acl离线模型调用样例,最后检验运行结果。具体过程可参见run.sh脚本。 - ```bash - bash run.sh - ``` -## 更新说明 -| 时间 | 更新事项 | -|------------| ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/common.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/common.h deleted file mode 100644 index cd9fd4d074237c99b39b171e5d8f9be828b5edd1..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/common.h +++ /dev/null @@ -1,45 +0,0 @@ -/** - * @file common.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef COMMON_H -#define COMMON_H - -#include -#include -#include -#include -#include - -#include "acl/acl.h" - -#define SUCCESS 0 -#define FAILED 1 - -#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) -#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) -#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args) - -/** - * @brief Read data from file - * @param [in] filePath: file path - * @param [out] fileSize: file size - * @return read result - */ -bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize); - -/** - * @brief Write data to file - * @param [in] filePath: file path - * @param [in] buffer: data to write to file - * @param [in] size: size to write - * @return write result - */ -bool WriteFile(const std::string &filePath, const void *buffer, size_t size); - -#endif // COMMON_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/op_runner.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/op_runner.h deleted file mode 100644 index 94ef9a8d84c64eab343ac1b13caa471794ec0b10..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/op_runner.h +++ /dev/null @@ -1,170 +0,0 @@ -/** - * @file op_runner.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OP_RUNNER_H -#define OP_RUNNER_H - -#include "acl/acl.h" -#include "common.h" -#include "operator_desc.h" - -/** - * Op Runner - */ -class OpRunner { -public: - /** - * @brief Constructor - * @param [in] opDesc: op description - */ - explicit OpRunner(OperatorDesc *opDesc); - - /** - * @brief Destructor - */ - virtual ~OpRunner(); - - /** - * @brief Init op runner - */ - bool Init(); - - /** - * @brief Get number of inputs - * @return number of inputs - */ - const size_t NumInputs(); - - /** - * @brief Get number of outputs - * @return number of outputs - */ - const size_t NumOutputs(); - - /** - * @brief Get input size by index - * @param [in] index: input index - * @return size of the input - */ - const size_t GetInputSize(size_t index) const; - - /** - * @brief Get output size by index - * @param [in] index: output index - * @return size of the output - */ - size_t GetOutputSize(size_t index) const; - - /** - * @brief Get input element count by index - * @param i[in] ndex: input index - * @return element count of the input - */ - size_t GetInputElementCount(size_t index) const; - - /** - * @brief Get output element count by index - * @param [in] index: output index - * @return element count of the output - */ - size_t GetOutputElementCount(size_t index) const; - - /** - * @brief Get input shape by index - * @param [in] index: input index - * @return shape of the output - */ - std::vector GetInputShape(size_t index) const; - - /** - * @brief Get output shape by index - * @param [in] index: output index - * @return shape of the output - */ - std::vector GetOutputShape(size_t index) const; - - /** - * @brief Get input buffer(host memory) by index - * @tparam T: data type - * @param [in] index: input index - * @return host address of the input - */ - template T *GetInputBuffer(size_t index) - { - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return nullptr; - } - return reinterpret_cast(hostInputs_[index]); - } - - /** - * @brief Get output buffer(host memory) by index - * @tparam T: data type - * @param [in] index: output index - * @return host address of the output - */ - template const T *GetOutputBuffer(size_t index) - { - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return nullptr; - } - - return reinterpret_cast(hostOutputs_[index]); - } - - /** - * @brief Print readable input by index - * @param [in] index: input index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintInput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Print readable output by index - * @param [in] index: output index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintOutput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Compile static op - * @return compile result - */ - bool CompileStaticOp(); - - /** - * @brief Compile dynamic op - * @return compile result - */ - bool CompileDynamicOp(); - - /** - * @brief Run op - * @return run result - */ - bool RunOp(); - -private: - size_t numInputs_; - size_t numOutputs_; - - std::vector inputBuffers_; - std::vector outputBuffers_; - - std::vector devInputs_; - std::vector devOutputs_; - - std::vector hostInputs_; - std::vector hostOutputs_; - OperatorDesc *opDesc_; -}; - -#endif // OP_RUNNER_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/operator_desc.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/operator_desc.h deleted file mode 100644 index e53b91cd9866a7737d29ff2eabf44425e7b9a76e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/inc/operator_desc.h +++ /dev/null @@ -1,59 +0,0 @@ -/** - * @file operator_desc.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OPERATOR_DESC_H -#define OPERATOR_DESC_H - -#include -#include - -#include "acl/acl.h" - -/** - * Op description - */ -struct OperatorDesc { - /** - * Constructor - * @param [in] opType: op type - */ - explicit OperatorDesc(std::string opType); - - /** - * Destructor - */ - virtual ~OperatorDesc(); - - /** - * Add an input tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - /** - * Add an output tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - std::string opType; - std::vector inputDesc; - std::vector outputDesc; - aclopAttr *opAttr; -}; - -#endif // OPERATOR_DESC_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/run.sh deleted file mode 100755 index 2b66cb2f2130f5295a142b3f8e4b5678ecaba675..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/run.sh +++ /dev/null @@ -1,163 +0,0 @@ -#!/bin/bash -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=0 - -CURRENT_DIR=$( - cd $(dirname ${BASH_SOURCE:-$0}) - pwd -) -cd $CURRENT_DIR - -# 导出环境变量 -DTYPE="float16" -JSON_NAME=add_custom -SHORT=m:,v:, -LONG=is-dynamic:,dtype:, -OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") -eval set -- "$OPTS" -while :; do - case "$1" in - # IS_DYNAMIC 0: static op - # IS_DYNAMIC 1: dynamic op - -m | --is-dynamic) - IS_DYNAMIC="$2" - shift 2 - ;; - # float16, float, int32 - -v | --dtype) - DTYPE="$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "[ERROR] Unexpected option: $1" - break - ;; - esac -done -if [ ! $IS_DYNAMIC ]; then - IS_DYNAMIC=1 -fi - -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash -export DDK_PATH=$_ASCEND_INSTALL_PATH -export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64 - -# 检查当前昇腾芯片的类型 -function check_soc_version() { - SOC_VERSION_CONCAT=$(python3 -c ''' -import ctypes, os -def get_soc_version(): - max_len = 256 - rtsdll = ctypes.CDLL(f"libruntime.so") - c_char_t = ctypes.create_string_buffer(b"\xff" * max_len, max_len) - rtsdll.rtGetSocVersion.restype = ctypes.c_uint64 - rt_error = rtsdll.rtGetSocVersion(c_char_t, ctypes.c_uint32(max_len)) - if rt_error: - print("rt_error:", rt_error) - return "" - soc_full_name = c_char_t.value.decode("utf-8") - find_str = "Short_SoC_version=" - ascend_home_dir = os.environ.get("DDK_PATH") - with open(f"{ascend_home_dir}/compiler/data/platform_config/{soc_full_name}.ini", "r") as f: - for line in f: - if find_str in line: - start_index = line.find(find_str) - result = line[start_index + len(find_str):].strip() - return "{},{}".format(soc_full_name, result.lower()) - return "" -print(get_soc_version()) - ''') - if [[ ${SOC_VERSION_CONCAT}"x" = "x" ]]; then - echo "ERROR: SOC_VERSION_CONCAT is invalid!" - return 1 - fi - SOC_FULL_VERSION=$(echo $SOC_VERSION_CONCAT | cut -d ',' -f 1) - SOC_SHORT_VERSION=$(echo $SOC_VERSION_CONCAT | cut -d ',' -f 2) -} - -function main { - if [[ ${IS_DYNAMIC}"x" = "x" ]]; then - echo "ERROR: IS_DYNAMIC is invalid!" - return 1 - fi - - # 1. 清楚遗留生成文件和日志文件 - rm -rf $HOME/ascend/log/* - rm -rf op_models/*.om - - # 2. 编译离线om模型 - cd $CURRENT_DIR - if [ $IS_DYNAMIC == 1 ]; then - atc --singleop=scripts/${JSON_NAME}_dynamic_shape.json --output=op_models/ --soc_version=${SOC_FULL_VERSION} - else - atc --singleop=scripts/${JSON_NAME}_static_shape.json --output=op_models/ --soc_version=${SOC_FULL_VERSION} - fi - - # 3. 生成输入数据和真值数据 - cd $CURRENT_DIR - python3 scripts/gen_data.py - if [ $? -ne 0 ]; then - echo "ERROR: generate input data failed!" - return 1 - fi - echo "INFO: generate input data success!" - - # 4. 编译acl可执行文件 - cd $CURRENT_DIR - rm -rf build - mkdir -p build - cd build - cmake ../src - if [ $? -ne 0 ]; then - echo "ERROR: cmake failed!" - return 1 - fi - echo "INFO: cmake success!" - make - if [ $? -ne 0 ]; then - echo "ERROR: make failed!" - return 1 - fi - echo "INFO: make success!" - - # 5. 运行可执行文件 - cd $CURRENT_DIR/output - if [ $IS_DYNAMIC == 1 ]; then - echo "INFO: execute dynamic op!" - ./execute_add_op $IS_DYNAMIC 2048 - else - echo "INFO: execute static op!" - ./execute_add_op - fi - if [ $? -ne 0 ]; then - echo "ERROR: acl executable run failed! please check your project!" - return 1 - fi - echo "INFO: acl executable run success!" - - # 6. 比较真值文件 - cd $CURRENT_DIR - python3 scripts/verify_result.py output/output_z.bin output/golden.bin - if [ $? -ne 0 ]; then - echo "ERROR: verify result failed!" - return 1 - fi -} - -check_soc_version -main diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/acl.json b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/acl.json deleted file mode 100644 index 9e26dfeeb6e641a33dae4961196235bdb965b21b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/acl.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_dynamic_shape.json b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_dynamic_shape.json deleted file mode 100644 index abba89d740a90d65c69844c947436173f716dec6..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_dynamic_shape.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "op": "AddCustom", - "input_desc": [ - { - "name": "x", - "param_type": "required", - "format": "ND", - "shape": [-1, -1], - "shape_range": [[1,-1],[1,-1]], - "type": "float16" - }, - { - "name": "y", - "param_type": "required", - "format":"ND", - "shape": [-1, -1], - "shape_range": [[1,-1],[1,-1]], - "type": "float16" - } - ], - "output_desc": [ - { - "name": "z", - "param_type": "required", - "format": "ND", - "shape": [-1, -1], - "shape_range": [[1,-1],[1,-1]], - "type": "float16" - } - ] - } -] diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_static_shape.json b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_static_shape.json deleted file mode 100644 index 0deaae0f741f1b5e27195fcb472ca606b17f9e24..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/add_custom_static_shape.json +++ /dev/null @@ -1,30 +0,0 @@ -[ - { - "op": "AddCustom", - "input_desc": [ - { - "name": "x", - "param_type": "required", - "format": "ND", - "shape": [8, 2048], - "type": "float16" - }, - { - "name": "y", - "param_type": "required", - "format":"ND", - "shape": [8, 2048], - "type": "float16" - } - ], - "output_desc": [ - { - "name": "z", - "param_type": "required", - "format": "ND", - "shape": [8, 2048], - "type": "float16" - } - ] - } -] diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/gen_data.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/gen_data.py deleted file mode 100644 index aa692b72a8b1818c5ac2e7fafb2dfb7e5d28001f..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/gen_data.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import numpy as np - - -def gen_golden_data_simple(): - # input_x = np.random.uniform(-100, 100, [8, 2048]).astype(np.float32) - # input_y = np.random.uniform(-100, 100, [8, 2048]).astype(np.float32) - input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) - input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) - golden = (input_x + input_y).astype(np.float16) - - input_x.tofile("./input/input_x.bin") - input_y.tofile("./input/input_y.bin") - golden.tofile("./output/golden.bin") - - -if __name__ == "__main__": - gen_golden_data_simple() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py deleted file mode 100644 index 1a21d809ab206a65bc952ca4cb06c345edcd3e7a..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/scripts/verify_result.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import sys -import numpy as np - -# for float16 -relative_tol = 1e-3 -absolute_tol = 1e-5 -error_tol = 1e-3 - - -def verify_result(output, golden): - output = np.fromfile(output, dtype=np.float16).reshape(-1) - golden = np.fromfile(golden, dtype=np.float16).reshape(-1) - different_element_results = np.isclose(output, - golden, - rtol=relative_tol, - atol=absolute_tol, - equal_nan=True) - different_element_indexes = np.where(different_element_results == False)[0] - for index in range(len(different_element_indexes)): - real_index = different_element_indexes[index] - golden_data = golden[real_index] - output_data = output[real_index] - print( - "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % - (real_index, golden_data, output_data, - abs(output_data - golden_data) / golden_data)) - if index == 100: - break - error_ratio = float(different_element_indexes.size) / golden.size - print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol)) - return error_ratio <= error_tol - - -if __name__ == '__main__': - try: - res = verify_result(sys.argv[1], sys.argv[2]) - if not res: - raise ValueError("[ERROR] result error") - else: - print("test pass") - except Exception as e: - print(e) - sys.exit(1) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/CMakeLists.txt deleted file mode 100644 index a43827510099f6e0994f8d449d3be908e4de3fd6..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. - -# CMake lowest version requirement -cmake_minimum_required(VERSION 3.5.1) - -# project information -project(acl_execute_add) - -# Compile options -add_compile_options(-std=c++11) - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output") -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output") - -set(INC_PATH $ENV{DDK_PATH}) - -if (NOT DEFINED ENV{DDK_PATH}) - set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") - message(STATUS "set default INC_PATH: ${INC_PATH}") -else () - message(STATUS "env INC_PATH: ${INC_PATH}") -endif() - -set(LIB_PATH $ENV{NPU_HOST_LIB}) - -# Dynamic libraries in the stub directory can only be used for compilation -if (NOT DEFINED ENV{NPU_HOST_LIB}) - set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/") - set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/") - message(STATUS "set default LIB_PATH: ${LIB_PATH}") -else () - message(STATUS "env LIB_PATH: ${LIB_PATH}") -endif() - -# Header path -include_directories( - ${INC_PATH}/runtime/include - ${INC_PATH}/atc/include - ../inc -) - -# add host lib path -link_directories( - ${LIB_PATH} - ${LIB_PATH1} -) - -add_executable(execute_add_op - operator_desc.cpp - op_runner.cpp - main.cpp - common.cpp -) - -target_link_libraries(execute_add_op - ascendcl - acl_op_compiler - stdc++ -) - -install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/common.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/common.cpp deleted file mode 100644 index 2fb3a18da3aea70016ddc47e5a766dbb4168d1b2..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/common.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file common.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "common.h" - -#include -#include -#include - -#include - -extern bool g_isDevice; - -bool ReadFile(const std::string &filePath, size_t &fileSize, void *buffer, size_t bufferSize) -{ - struct stat sBuf; - int fileStatus = stat(filePath.data(), &sBuf); - if (fileStatus == -1) { - ERROR_LOG("failed to get file %s", filePath.c_str()); - return false; - } - if (S_ISREG(sBuf.st_mode) == 0) { - ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); - return false; - } - - std::ifstream file; - file.open(filePath, std::ios::binary); - if (!file.is_open()) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - std::filebuf *buf = file.rdbuf(); - size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); - if (size == 0) { - ERROR_LOG("file size is 0"); - file.close(); - return false; - } - if (size > bufferSize) { - ERROR_LOG("file size is larger than buffer size"); - file.close(); - return false; - } - buf->pubseekpos(0, std::ios::in); - buf->sgetn(static_cast(buffer), size); - fileSize = size; - file.close(); - return true; -} - -bool WriteFile(const std::string &filePath, const void *buffer, size_t size) -{ - if (buffer == nullptr) { - ERROR_LOG("Write file failed. buffer is nullptr"); - return false; - } - - int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); - if (fd < 0) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - auto writeSize = write(fd, buffer, size); - (void)close(fd); - if (writeSize != size) { - ERROR_LOG("Write file Failed."); - return false; - } - - return true; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/main.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/main.cpp deleted file mode 100644 index b977fd4d8cee8e9aabab267f7bba4be90282d0be..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/main.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/** - * @file main.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include -#include - -#include -#include - -#include "acl/acl.h" -#include "common.h" -#include "op_runner.h" - -bool g_isDevice = false; -int deviceId = 0; -int isDynamic = 0; -int length = 0; -int dynamicType = 3; -int lengthParam = 2; - -OperatorDesc CreateOpDesc() -{ - // define operator - std::vector shape{8, 2048}; - std::string opType = "AddCustom"; - if (isDynamic) { - shape = {8, length}; - } - aclDataType dataType = ACL_FLOAT16; - aclFormat format = ACL_FORMAT_ND; - OperatorDesc opDesc(opType); - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); - return opDesc; -} - -bool SetInputData(OpRunner &runner) -{ - size_t fileSize = 0; - ReadFile("../input/input_x.bin", fileSize, runner.GetInputBuffer(0), runner.GetInputSize(0)); - ReadFile("../input/input_y.bin", fileSize, runner.GetInputBuffer(1), runner.GetInputSize(1)); - INFO_LOG("Set input success"); - return true; -} - -bool ProcessOutputData(OpRunner &runner) -{ - WriteFile("../output/output_z.bin", runner.GetOutputBuffer(0), runner.GetOutputSize(0)); - INFO_LOG("Write output success"); - return true; -} - -bool RunOp() -{ - // Create op desc - OperatorDesc opDesc = CreateOpDesc(); - - // Create Runner - OpRunner opRunner(&opDesc); - if (!opRunner.Init()) { - ERROR_LOG("Init OpRunner failed"); - return false; - } - - // Load inputs - if (!SetInputData(opRunner)) { - ERROR_LOG("Set input data failed"); - return false; - } - - // Run op - if (!opRunner.RunOp()) { - ERROR_LOG("Run op failed"); - return false; - } - - // Process output data - if (!ProcessOutputData(opRunner)) { - ERROR_LOG("Process output data failed"); - return false; - } - - INFO_LOG("Run op success"); - return true; -} - -void DestroyResource() -{ - bool flag = false; - if (aclrtResetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Reset device %d failed", deviceId); - flag = true; - } - INFO_LOG("Reset Device success"); - if (aclFinalize() != ACL_SUCCESS) { - ERROR_LOG("Finalize acl failed"); - flag = true; - } - if (flag) { - ERROR_LOG("Destroy resource failed"); - } else { - INFO_LOG("Destroy resource success"); - } -} - -bool InitResource() -{ - std::string output = "./output"; - if (access(output.c_str(), 0) == -1) { - int ret = mkdir(output.c_str(), 0700); - if (ret == 0) { - INFO_LOG("Make output directory successfully"); - } else { - ERROR_LOG("Make output directory fail"); - return false; - } - } - - // acl.json is dump or profiling config file - if (aclInit("../scripts/acl.json") != ACL_SUCCESS) { - ERROR_LOG("acl init failed"); - return false; - } - - if (aclrtSetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Set device failed. deviceId is %d", deviceId); - (void)aclFinalize(); - return false; - } - INFO_LOG("Set device[%d] success", deviceId); - - // runMode is ACL_HOST which represents app is running in host - // runMode is ACL_DEVICE which represents app is running in device - aclrtRunMode runMode; - if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) { - ERROR_LOG("Get run mode failed"); - DestroyResource(); - return false; - } - g_isDevice = (runMode == ACL_DEVICE); - INFO_LOG("Get RunMode[%d] success", runMode); - - // set model path - if (aclopSetModelDir("../op_models") != ACL_SUCCESS) { - std::cerr << "Load single op model failed" << std::endl; - (void)aclFinalize(); - return FAILED; - } - INFO_LOG("aclopSetModelDir op model success"); - - return true; -} - -int main(int argc, char **argv) -{ - if (argc == dynamicType) { - INFO_LOG("dynamic op will be called"); - isDynamic = atoi(argv[1]); - length = atoi(argv[lengthParam]); - } else if (argc == 1) { - INFO_LOG("static op will be called"); - } else { - ERROR_LOG("wrong input parameter number"); - return -1; - } - - if (!InitResource()) { - ERROR_LOG("Init resource failed"); - return FAILED; - } - INFO_LOG("Init resource success"); - - if (!RunOp()) { - DestroyResource(); - return FAILED; - } - - DestroyResource(); - - return SUCCESS; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/op_runner.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/op_runner.cpp deleted file mode 100644 index b05f14b34b92672b67448eba1883d078e93dcfd5..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/op_runner.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/** - * @file op_runner.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "op_runner.h" - -#include -#include - -#include "acl/acl_op_compiler.h" -#include "common.h" - -using namespace std; - -extern bool g_isDevice; - -OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) -{ - numInputs_ = opDesc->inputDesc.size(); - numOutputs_ = opDesc->outputDesc.size(); -} - -OpRunner::~OpRunner() -{ - for (size_t i = 0; i < numInputs_; ++i) { - (void)aclDestroyDataBuffer(inputBuffers_[i]); - (void)aclrtFree(devInputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostInputs_[i]); - } else { - (void)aclrtFreeHost(hostInputs_[i]); - } - } - - for (size_t i = 0; i < numOutputs_; ++i) { - (void)aclDestroyDataBuffer(outputBuffers_[i]); - (void)aclrtFree(devOutputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostOutputs_[i]); - } else { - (void)aclrtFreeHost(hostOutputs_[i]); - } - } -} - -bool OpRunner::Init() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - devInputs_.emplace_back(devMem); - inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostMem = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostMem, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } - if (hostMem == nullptr) { - ERROR_LOG("Malloc memory for input[%zu] failed", i); - return false; - } - hostInputs_.emplace_back(hostMem); - } - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - devOutputs_.emplace_back(devMem); - outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostOutput = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } - if (hostOutput == nullptr) { - ERROR_LOG("Malloc host memory for output[%zu] failed", i); - return false; - } - hostOutputs_.emplace_back(hostOutput); - } - - return true; -} - -const size_t OpRunner::NumInputs() -{ - return numInputs_; -} - -const size_t OpRunner::NumOutputs() -{ - return numOutputs_; -} - -const size_t OpRunner::GetInputSize(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->inputDesc[index]); -} - -std::vector OpRunner::GetInputShape(size_t index) const -{ - std::vector ret; - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ret; - } - - auto desc = opDesc_->inputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - - return ret; -} - -std::vector OpRunner::GetOutputShape(size_t index) const -{ - std::vector ret; - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ret; - } - - auto desc = opDesc_->outputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - return ret; -} - -size_t OpRunner::GetInputElementCount(size_t index) const -{ - if (index >= opDesc_->inputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->inputDesc[index]); -} - -size_t OpRunner::GetOutputElementCount(size_t index) const -{ - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->outputDesc[index]); -} - -size_t OpRunner::GetOutputSize(size_t index) const -{ - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->outputDesc[index]); -} - -bool OpRunner::RunOp() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) { - ERROR_LOG("Copy input[%zu] failed", i); - return false; - } - INFO_LOG("Copy input[%zu] success", i); - } - - aclrtStream stream = nullptr; - if (aclrtCreateStream(&stream) != ACL_SUCCESS) { - ERROR_LOG("Create stream failed"); - return false; - } - INFO_LOG("Create stream success"); - - auto ret = aclopExecuteV2(opDesc_->opType.c_str(), numInputs_, opDesc_->inputDesc.data(), inputBuffers_.data(), - numOutputs_, opDesc_->outputDesc.data(), outputBuffers_.data(), opDesc_->opAttr, stream); - if (ret == ACL_ERROR_OP_TYPE_NOT_MATCH || ret == ACL_ERROR_OP_INPUT_NOT_MATCH || - ret == ACL_ERROR_OP_OUTPUT_NOT_MATCH || ret == ACL_ERROR_OP_ATTR_NOT_MATCH) { - ERROR_LOG("[%s] op with the given description is not compiled. Please run atc first, errorCode is %d", - opDesc_->opType.c_str(), static_cast(ret)); - (void)aclrtDestroyStream(stream); - return false; - } else if (ret != ACL_SUCCESS) { - (void)aclrtDestroyStream(stream); - ERROR_LOG("Execute %s failed. errorCode is %d", opDesc_->opType.c_str(), static_cast(ret)); - return false; - } - INFO_LOG("Execute %s success", opDesc_->opType.c_str()); - - if (aclrtSynchronizeStream(stream) != ACL_SUCCESS) { - ERROR_LOG("Synchronize stream failed"); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Synchronize stream success"); - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) { - INFO_LOG("Copy output[%zu] success", i); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Copy output[%zu] success", i); - } - - (void)aclrtDestroyStream(stream); - return true; -} - -template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << data[i]; - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]); - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) -{ - if (data == nullptr) { - ERROR_LOG("Print data failed. data is nullptr"); - return; - } - - switch (dataType) { - case ACL_BOOL: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT16: - DoPrintFp16Data(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_DOUBLE: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - default: - ERROR_LOG("Unsupported type: %d", dataType); - } -} - -void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_); - return; - } - - auto desc = opDesc_->inputDesc[index]; - PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} - -void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return; - } - - auto desc = opDesc_->outputDesc[index]; - PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/operator_desc.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/operator_desc.cpp deleted file mode 100644 index b830e8b2ec2b176c6c4d8b41bb887a5c2f4403d4..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/src/operator_desc.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/** - * @file operator_desc.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "operator_desc.h" - -#include "common.h" - -using namespace std; - -OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType)) -{ - opAttr = aclopCreateAttr(); -} - -OperatorDesc::~OperatorDesc() -{ - for (auto *desc : inputDesc) { - aclDestroyTensorDesc(desc); - } - - for (auto *desc : outputDesc) { - aclDestroyTensorDesc(desc); - } - - aclopDestroyAttr(opAttr); -} - -OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - - inputDesc.emplace_back(desc); - return *this; -} - -OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, - aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - - outputDesc.emplace_back(desc); - return *this; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/README.md deleted file mode 100644 index cd06b7d5ebdd8873a412edb9449bfc2ef48e16a0..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/README.md +++ /dev/null @@ -1,24 +0,0 @@ -## 使用aclopCompile模型调用的方式调用AddTemplateCustom算子工程 -该样例暂不支持Atlas 200/500 A2 推理产品。 - -## 运行样例算子 -### 1.编译算子工程 -运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 -### 2.aclopCompile调用的方式调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel - ``` - - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后编译与运行aclopCompile调用方式的模型调用样例,最后检验运行结果。具体过程可参见run.sh脚本。 - ```bash - bash run.sh - ``` -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/common.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/common.h deleted file mode 100644 index 11bb4aeca57d04c8adcd6f215674ee4ba1553f21..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/common.h +++ /dev/null @@ -1,45 +0,0 @@ -/** - * @file common.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef COMMON_H -#define COMMON_H - -#include -#include -#include -#include -#include - -#include "acl/acl.h" - -#define SUCCESS 0 -#define FAILED 1 - -#define INFO_LOG(fmt, args...) fprintf(stdout, "[INFO] " fmt "\n", ##args) -#define WARN_LOG(fmt, args...) fprintf(stdout, "[WARN] " fmt "\n", ##args) -#define ERROR_LOG(fmt, args...) fprintf(stderr, "[ERROR] " fmt "\n", ##args) - -/** - * @brief Read data from file - * @param [in] filePath: file path - * @param [out] fileSize: file size - * @return read result - */ -bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize); - -/** - * @brief Write data to file - * @param [in] filePath: file path - * @param [in] buffer: data to write to file - * @param [in] size: size to write - * @return write result - */ -bool WriteFile(const std::string &filePath, const void *buffer, size_t size); - -#endif // COMMON_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/op_runner.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/op_runner.h deleted file mode 100644 index 66b6ea2c3c403946f14d588fd6f63a9aeb215960..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/op_runner.h +++ /dev/null @@ -1,180 +0,0 @@ -/** - * @file op_runner.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OP_RUNNER_H -#define OP_RUNNER_H - -#include "acl/acl.h" -#include "aclnn/acl_meta.h" -#include "common.h" -#include "operator_desc.h" - -/** - * Op Runner - */ -class OpRunner { -public: - /** - * @brief Constructor - * @param [in] opDesc: op description - */ - explicit OpRunner(OperatorDesc *opDesc); - - /** - * @brief Destructor - */ - virtual ~OpRunner(); - - /** - * @brief Init op runner - */ - bool Init(); - - /** - * @brief Get number of inputs - * @return number of inputs - */ - const size_t NumInputs(); - - /** - * @brief Get number of outputs - * @return number of outputs - */ - const size_t NumOutputs(); - - /** - * @brief Get input size by index - * @param [in] index: input index - * @return size of the input - */ - const size_t GetInputSize(size_t index) const; - const size_t GetInputNumDims(size_t index) const; - aclDataType GetInputDataType(size_t index) const; - aclFormat GetInputFormat(size_t index) const; - - /** - * @brief Get output size by index - * @param [in] index: output index - * @return size of the output - */ - size_t GetOutputSize(size_t index) const; - const size_t GetOutputNumDims(size_t index) const; - aclDataType GetOutputDataType(size_t index) const; - aclFormat GetOutputFormat(size_t index) const; - - /** - * @brief Get input element count by index - * @param i[in] ndex: input index - * @return element count of the input - */ - size_t GetInputElementCount(size_t index) const; - - /** - * @brief Get output element count by index - * @param [in] index: output index - * @return element count of the output - */ - size_t GetOutputElementCount(size_t index) const; - - /** - * @brief Get input shape by index - * @param [in] index: input index - * @return shape of the output - */ - std::vector GetInputShape(size_t index) const; - - /** - * @brief Get output shape by index - * @param [in] index: output index - * @return shape of the output - */ - std::vector GetOutputShape(size_t index) const; - - /** - * @brief Get input buffer(host memory) by index - * @tparam T: data type - * @param [in] index: input index - * @return host address of the input - */ - template T *GetInputBuffer(size_t index) - { - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return nullptr; - } - return reinterpret_cast(hostInputs_[index]); - } - - /** - * @brief Get output buffer(host memory) by index - * @tparam T: data type - * @param [in] index: output index - * @return host address of the output - */ - template const T *GetOutputBuffer(size_t index) - { - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return nullptr; - } - - return reinterpret_cast(hostOutputs_[index]); - } - - /** - * @brief Print readable input by index - * @param [in] index: input index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintInput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Print readable output by index - * @param [in] index: output index - * @param [in] elementsPerRow: number of elements per row - */ - void PrintOutput(size_t index, size_t elementsPerRow = 16); - - /** - * @brief Compile static op - * @return compile result - */ - bool CompileStaticOp(); - - /** - * @brief Compile dynamic op - * @return compile result - */ - bool CompileDynamicOp(); - - /** - * @brief Run op - * @return run result - */ - bool RunOp(); - -private: - size_t numInputs_; - size_t numOutputs_; - - std::vector inputBuffers_; - std::vector outputBuffers_; - - std::vector devInputs_; - std::vector devOutputs_; - - std::vector hostInputs_; - std::vector hostOutputs_; - - std::vector inputTensor_; - std::vector outputTensor_; - OperatorDesc *opDesc_; -}; - -#endif // OP_RUNNER_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/operator_desc.h b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/operator_desc.h deleted file mode 100644 index 8d14e39530d7555f8b512e1fdbb9da78cb096e39..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/inc/operator_desc.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * @file operator_desc.h - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef OPERATOR_DESC_H -#define OPERATOR_DESC_H - -#include -#include - -#include "acl/acl.h" - -/** - * Op description - */ -struct OperatorDesc { - /** - * Constructor - */ - explicit OperatorDesc(); - - /** - * Destructor - */ - virtual ~OperatorDesc(); - - /** - * Add an input tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - /** - * Add an output tensor description - * @param [in] dataType: data type - * @param [in] numDims: number of dims - * @param [in] dims: dims - * @param [in] format: format - * @return OperatorDesc - */ - OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); - - std::string opType; - std::vector inputDesc; - std::vector outputDesc; - aclopAttr *opAttr; -}; - -#endif // OPERATOR_DESC_H diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/input/.keep b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/input/.keep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/acl.json b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/acl.json deleted file mode 100644 index 9e26dfeeb6e641a33dae4961196235bdb965b21b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/acl.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/gen_data.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/gen_data.py deleted file mode 100644 index ea8ce828aea146c9ab462290be403c4cfd483b75..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/gen_data.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import numpy as np - - -def gen_golden_data_simple(): - input_x = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) - input_y = np.random.uniform(1, 100, [8, 2048]).astype(np.float16) - golden = (input_x + input_y).astype(np.float16) - - input_x.tofile("./input/input_x.bin") - input_y.tofile("./input/input_y.bin") - golden.tofile("./output/golden.bin") - - -if __name__ == "__main__": - gen_golden_data_simple() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py deleted file mode 100644 index 1a21d809ab206a65bc952ca4cb06c345edcd3e7a..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/scripts/verify_result.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import sys -import numpy as np - -# for float16 -relative_tol = 1e-3 -absolute_tol = 1e-5 -error_tol = 1e-3 - - -def verify_result(output, golden): - output = np.fromfile(output, dtype=np.float16).reshape(-1) - golden = np.fromfile(golden, dtype=np.float16).reshape(-1) - different_element_results = np.isclose(output, - golden, - rtol=relative_tol, - atol=absolute_tol, - equal_nan=True) - different_element_indexes = np.where(different_element_results == False)[0] - for index in range(len(different_element_indexes)): - real_index = different_element_indexes[index] - golden_data = golden[real_index] - output_data = output[real_index] - print( - "data index: %06d, expected: %-.9f, actual: %-.9f, rdiff: %-.6f" % - (real_index, golden_data, output_data, - abs(output_data - golden_data) / golden_data)) - if index == 100: - break - error_ratio = float(different_element_indexes.size) / golden.size - print("error ratio: %.4f, tolrence: %.4f" % (error_ratio, error_tol)) - return error_ratio <= error_tol - - -if __name__ == '__main__': - try: - res = verify_result(sys.argv[1], sys.argv[2]) - if not res: - raise ValueError("[ERROR] result error") - else: - print("test pass") - except Exception as e: - print(e) - sys.exit(1) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/CMakeLists.txt deleted file mode 100644 index 7c75175838130155e43a6b0dfddf85a8f07c8c45..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/CMakeLists.txt +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. - -# CMake lowest version requirement -cmake_minimum_required(VERSION 3.5.1) - -# project information -project(acl_execute_add) - -# Compile options -add_compile_options(-std=c++11) - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "../output") -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "../output") - -set(INC_PATH $ENV{DDK_PATH}) - -if (NOT DEFINED ENV{DDK_PATH}) - set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") - message(STATUS "set default INC_PATH: ${INC_PATH}") -else () - message(STATUS "env INC_PATH: ${INC_PATH}") -endif() - -set(LIB_PATH $ENV{NPU_HOST_LIB}) - -# Dynamic libraries in the stub directory can only be used for compilation -if (NOT DEFINED ENV{NPU_HOST_LIB}) - set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/") - set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/") - message(STATUS "set default LIB_PATH: ${LIB_PATH}") -else () - message(STATUS "env LIB_PATH: ${LIB_PATH}") -endif() - -# Header path -include_directories( - ${INC_PATH}/runtime/include - ${INC_PATH}/atc/include - ../inc -) - -# add host lib path -link_directories( - ${LIB_PATH} - ${LIB_PATH1} -) - -add_executable(execute_add_op - operator_desc.cpp - op_runner.cpp - main.cpp - common.cpp -) - -target_link_libraries(execute_add_op - ascendcl - acl_op_compiler - nnopbase - stdc++ -) - -install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/common.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/common.cpp deleted file mode 100644 index 992759c95af685fba85838acbe188a3533928128..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/common.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file common.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "common.h" - -#include -#include -#include - -#include - -extern bool g_isDevice; - -bool ReadFile(const std::string &filePath, size_t fileSize, void *buffer, size_t bufferSize) -{ - struct stat sBuf; - int fileStatus = stat(filePath.data(), &sBuf); - if (fileStatus == -1) { - ERROR_LOG("failed to get file %s", filePath.c_str()); - return false; - } - if (S_ISREG(sBuf.st_mode) == 0) { - ERROR_LOG("%s is not a file, please enter a file", filePath.c_str()); - return false; - } - - std::ifstream file; - file.open(filePath, std::ios::binary); - if (!file.is_open()) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - std::filebuf *buf = file.rdbuf(); - size_t size = buf->pubseekoff(0, std::ios::end, std::ios::in); - if (size == 0) { - ERROR_LOG("file size is 0"); - file.close(); - return false; - } - if (size > bufferSize) { - ERROR_LOG("file size is larger than buffer size"); - file.close(); - return false; - } - buf->pubseekpos(0, std::ios::in); - buf->sgetn(static_cast(buffer), size); - fileSize = size; - file.close(); - return true; -} - -bool WriteFile(const std::string &filePath, const void *buffer, size_t size) -{ - if (buffer == nullptr) { - ERROR_LOG("Write file failed. buffer is nullptr"); - return false; - } - - int fd = open(filePath.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWRITE); - if (fd < 0) { - ERROR_LOG("Open file failed. path = %s", filePath.c_str()); - return false; - } - - size_t writeSize = write(fd, buffer, size); - (void)close(fd); - if (writeSize != size) { - ERROR_LOG("Write file Failed."); - return false; - } - - return true; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/main.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/main.cpp deleted file mode 100644 index 69331769cc1519cfdd01cd10d0d97f9ec78e4fc2..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/main.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/** - * @file main.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include -#include - -#include -#include - -#include "acl/acl.h" -#include "common.h" -#include "op_runner.h" - -bool g_isDevice = false; -int deviceId = 0; - -OperatorDesc CreateOpDesc() -{ - // define operator - std::vector shape{8, 2048}; - aclDataType dataType = ACL_FLOAT16; - aclFormat format = ACL_FORMAT_ND; - OperatorDesc opDesc; - opDesc.opType = "AddCustom"; - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); - opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); - return opDesc; -} - -bool SetInputData(OpRunner &runner) -{ - size_t fileSize = 0; - ReadFile("../input/input_x.bin", fileSize, runner.GetInputBuffer(0), runner.GetInputSize(0)); - ReadFile("../input/input_y.bin", fileSize, runner.GetInputBuffer(1), runner.GetInputSize(1)); - INFO_LOG("Set input success"); - return true; -} - -bool ProcessOutputData(OpRunner &runner) -{ - WriteFile("../output/output_z.bin", runner.GetOutputBuffer(0), runner.GetOutputSize(0)); - INFO_LOG("Write output success"); - return true; -} - -void DestroyResource() -{ - bool flag = false; - if (aclrtResetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Reset device %d failed", deviceId); - flag = true; - } - INFO_LOG("Reset Device success"); - if (aclFinalize() != ACL_SUCCESS) { - ERROR_LOG("Finalize acl failed"); - flag = true; - } - if (flag) { - ERROR_LOG("Destroy resource failed"); - } else { - INFO_LOG("Destroy resource success"); - } -} - -bool InitResource() -{ - // acl.json is dump or profiling config file - if (aclInit("../scripts/acl.json") != ACL_SUCCESS) { - ERROR_LOG("acl init failed"); - return false; - } - - if (aclrtSetDevice(deviceId) != ACL_SUCCESS) { - ERROR_LOG("Set device failed. deviceId is %d", deviceId); - (void)aclFinalize(); - return false; - } - INFO_LOG("Set device[%d] success", deviceId); - - // runMode is ACL_HOST which represents app is running in host - // runMode is ACL_DEVICE which represents app is running in device - aclrtRunMode runMode; - if (aclrtGetRunMode(&runMode) != ACL_SUCCESS) { - ERROR_LOG("Get run mode failed"); - DestroyResource(); - return false; - } - g_isDevice = (runMode == ACL_DEVICE); - INFO_LOG("Get RunMode[%d] success", runMode); - - return true; -} - -bool RunOp() -{ - // create op desc - OperatorDesc opDesc = CreateOpDesc(); - - // create Runner - OpRunner opRunner(&opDesc); - if (!opRunner.Init()) { - ERROR_LOG("Init OpRunner failed"); - return false; - } - - // Load inputs - if (!SetInputData(opRunner)) { - ERROR_LOG("Set input data failed"); - return false; - } - - // Run op - if (!opRunner.RunOp()) { - ERROR_LOG("Run op failed"); - return false; - } - - // process output data - if (!ProcessOutputData(opRunner)) { - ERROR_LOG("Process output data failed"); - return false; - } - INFO_LOG("Run op success"); - return true; -} - -int main(int argc, char **argv) -{ - if (!InitResource()) { - ERROR_LOG("Init resource failed"); - return FAILED; - } - INFO_LOG("Init resource success"); - - if (!RunOp()) { - DestroyResource(); - return FAILED; - } - - DestroyResource(); - - return SUCCESS; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/op_runner.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/op_runner.cpp deleted file mode 100644 index 094c34def9d6446b6db4e010ecf2016267bed6bd..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/op_runner.cpp +++ /dev/null @@ -1,437 +0,0 @@ -/** - * @file op_runner.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "op_runner.h" - -#include -#include - -#include "acl/acl_op_compiler.h" -#include "common.h" - -using namespace std; - -extern bool g_isDevice; - -OpRunner::OpRunner(OperatorDesc *opDesc) : opDesc_(opDesc) -{ - numInputs_ = opDesc->inputDesc.size(); - numOutputs_ = opDesc->outputDesc.size(); -} - -OpRunner::~OpRunner() -{ - for (size_t i = 0; i < numInputs_; ++i) { - (void)aclDestroyTensor(inputTensor_[i]); - (void)aclDestroyDataBuffer(inputBuffers_[i]); - (void)aclrtFree(devInputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostInputs_[i]); - } else { - (void)aclrtFreeHost(hostInputs_[i]); - } - } - - for (size_t i = 0; i < numOutputs_; ++i) { - (void)aclDestroyTensor(outputTensor_[i]); - (void)aclDestroyDataBuffer(outputBuffers_[i]); - (void)aclrtFree(devOutputs_[i]); - if (g_isDevice) { - (void)aclrtFree(hostOutputs_[i]); - } else { - (void)aclrtFreeHost(hostOutputs_[i]); - } - } -} - -bool OpRunner::Init() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - devInputs_.emplace_back(devMem); - inputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostInput = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostInput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostInput, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for input[%zu] failed", i); - return false; - } - } - if (hostInput == nullptr) { - ERROR_LOG("Malloc memory for input[%zu] failed", i); - return false; - } - hostInputs_.emplace_back(hostInput); - - aclTensor *inputTensor = - aclCreateTensor(GetInputShape(i).data(), GetInputNumDims(i), GetInputDataType(i), nullptr, 0, - GetInputFormat(i), GetInputShape(i).data(), GetInputNumDims(i), devInputs_[i]); - if (inputTensor == nullptr) { - ERROR_LOG("Create Tensor for input[%zu] failed", i); - return false; - } - inputTensor_.emplace_back(inputTensor); - } - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - void *devMem = nullptr; - if (aclrtMalloc(&devMem, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - devOutputs_.emplace_back(devMem); - outputBuffers_.emplace_back(aclCreateDataBuffer(devMem, size)); - - void *hostOutput = nullptr; - if (g_isDevice) { - if (aclrtMalloc(&hostOutput, size, ACL_MEM_MALLOC_HUGE_FIRST) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } else { - if (aclrtMallocHost(&hostOutput, size) != ACL_SUCCESS) { - ERROR_LOG("Malloc device memory for output[%zu] failed", i); - return false; - } - } - if (hostOutput == nullptr) { - ERROR_LOG("Malloc host memory for output[%zu] failed", i); - return false; - } - hostOutputs_.emplace_back(hostOutput); - - aclTensor *outputTensor = - aclCreateTensor(GetOutputShape(i).data(), GetOutputNumDims(i), GetOutputDataType(i), nullptr, 0, - GetOutputFormat(i), GetOutputShape(i).data(), GetOutputNumDims(i), devOutputs_[i]); - if (outputTensor == nullptr) { - ERROR_LOG("Create Tensor for output[%zu] failed", i); - return false; - } - outputTensor_.emplace_back(outputTensor); - } - - return true; -} - -const size_t OpRunner::NumInputs() -{ - return numInputs_; -} - -const size_t OpRunner::NumOutputs() -{ - return numOutputs_; -} - -const size_t OpRunner::GetInputSize(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->inputDesc[index]); -} - -const size_t OpRunner::GetInputNumDims(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescNumDims(opDesc_->inputDesc[index]); -} - -aclDataType OpRunner::GetInputDataType(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ACL_DT_UNDEFINED; - } - - return aclGetTensorDescType(opDesc_->inputDesc[index]); -} - -aclFormat OpRunner::GetInputFormat(size_t index) const -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ACL_FORMAT_UNDEFINED; - } - - return aclGetTensorDescFormat(opDesc_->inputDesc[index]); -} - -std::vector OpRunner::GetInputShape(size_t index) const -{ - std::vector ret; - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return ret; - } - - auto desc = opDesc_->inputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - - return ret; -} - -size_t OpRunner::GetOutputSize(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescSize(opDesc_->outputDesc[index]); -} - -const size_t OpRunner::GetOutputNumDims(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescNumDims(opDesc_->outputDesc[index]); -} - -aclDataType OpRunner::GetOutputDataType(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ACL_DT_UNDEFINED; - } - - return aclGetTensorDescType(opDesc_->outputDesc[index]); -} - -aclFormat OpRunner::GetOutputFormat(size_t index) const -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ACL_FORMAT_UNDEFINED; - } - - return aclGetTensorDescFormat(opDesc_->outputDesc[index]); -} - -std::vector OpRunner::GetOutputShape(size_t index) const -{ - std::vector ret; - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return ret; - } - - auto desc = opDesc_->outputDesc[index]; - for (size_t i = 0; i < aclGetTensorDescNumDims(desc); ++i) { - int64_t dimSize; - if (aclGetTensorDescDimV2(desc, i, &dimSize) != ACL_SUCCESS) { - ERROR_LOG("get dims from tensor desc failed. dims index = %zu", i); - ret.clear(); - return ret; - } - ret.emplace_back(dimSize); - } - return ret; -} - -size_t OpRunner::GetInputElementCount(size_t index) const -{ - if (index >= opDesc_->inputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numInputs = %zu", index, numInputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->inputDesc[index]); -} - -size_t OpRunner::GetOutputElementCount(size_t index) const -{ - if (index >= opDesc_->outputDesc.size()) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return 0; - } - - return aclGetTensorDescElementCount(opDesc_->outputDesc[index]); -} - -bool OpRunner::RunOp() -{ - for (size_t i = 0; i < numInputs_; ++i) { - auto size = GetInputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_HOST_TO_DEVICE; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(devInputs_[i], size, hostInputs_[i], size, kind) != ACL_SUCCESS) { - ERROR_LOG("Copy input[%zu] failed", i); - return false; - } - INFO_LOG("Copy input[%zu] success", i); - } - - aclrtStream stream = nullptr; - if (aclrtCreateStream(&stream) != ACL_SUCCESS) { - ERROR_LOG("Create stream failed"); - return false; - } - INFO_LOG("Create stream success"); - - aclOpExecutor *handle = nullptr; - - auto ret = - aclopCompileAndExecuteV2(opDesc_->opType.c_str(), numInputs_, opDesc_->inputDesc.data(), inputBuffers_.data(), - numOutputs_, opDesc_->outputDesc.data(), outputBuffers_.data(), opDesc_->opAttr, - ACL_ENGINE_SYS, ACL_COMPILE_SYS, nullptr, stream); - if (ret != ACL_SUCCESS) { - (void)aclrtDestroyStream(stream); - ERROR_LOG("aclopCompileAndExecuteV2 Operator failed. error code is %d", static_cast(ret)); - return false; - } - ret = aclrtSynchronizeStreamWithTimeout(stream, 5000); - if (ret != SUCCESS) { - ERROR_LOG("Synchronize stream failed. error code is %d", static_cast(ret)); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Synchronize stream success"); - - for (size_t i = 0; i < numOutputs_; ++i) { - auto size = GetOutputSize(i); - aclrtMemcpyKind kind = ACL_MEMCPY_DEVICE_TO_HOST; - if (g_isDevice) { - kind = ACL_MEMCPY_DEVICE_TO_DEVICE; - } - if (aclrtMemcpy(hostOutputs_[i], size, devOutputs_[i], size, kind) != ACL_SUCCESS) { - INFO_LOG("Copy output[%zu] success", i); - (void)aclrtDestroyStream(stream); - return false; - } - INFO_LOG("Copy output[%zu] success", i); - } - - (void)aclrtDestroyStream(stream); - return true; -} - -template void DoPrintData(const T *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << data[i]; - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void DoPrintFp16Data(const aclFloat16 *data, size_t count, size_t elementsPerRow) -{ - assert(elementsPerRow != 0); - for (size_t i = 0; i < count; ++i) { - std::cout << std::setw(10) << std::setprecision(4) << aclFloat16ToFloat(data[i]); - if (i % elementsPerRow == elementsPerRow - 1) { - std::cout << std::endl; - } - } -} - -void PrintData(const void *data, size_t count, aclDataType dataType, size_t elementsPerRow) -{ - if (data == nullptr) { - ERROR_LOG("Print data failed. data is nullptr"); - return; - } - - switch (dataType) { - case ACL_BOOL: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT8: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT16: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT32: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_INT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_UINT64: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT16: - DoPrintFp16Data(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_FLOAT: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - case ACL_DOUBLE: - DoPrintData(reinterpret_cast(data), count, elementsPerRow); - break; - default: - ERROR_LOG("Unsupported type: %d", dataType); - } -} - -void OpRunner::PrintInput(size_t index, size_t numElementsPerRow) -{ - if (index >= numInputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numInputs_); - return; - } - - auto desc = opDesc_->inputDesc[index]; - PrintData(hostInputs_[index], GetInputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} - -void OpRunner::PrintOutput(size_t index, size_t numElementsPerRow) -{ - if (index >= numOutputs_) { - ERROR_LOG("index out of range. index = %zu, numOutputs = %zu", index, numOutputs_); - return; - } - - auto desc = opDesc_->outputDesc[index]; - PrintData(hostOutputs_[index], GetOutputElementCount(index), aclGetTensorDescType(desc), numElementsPerRow); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/operator_desc.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/operator_desc.cpp deleted file mode 100644 index 921926cc12e2093992758652eb1c2b6608f4d603..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/src/operator_desc.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/** - * @file operator_desc.cpp - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "operator_desc.h" - -#include "common.h" - -using namespace std; - -OperatorDesc::OperatorDesc() -{ - opAttr = aclopCreateAttr(); -} - -OperatorDesc::~OperatorDesc() -{ - for (auto *desc : inputDesc) { - aclDestroyTensorDesc(desc); - } - - for (auto *desc : outputDesc) { - aclDestroyTensorDesc(desc); - } - aclopDestroyAttr(opAttr); -} - -OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - inputDesc.emplace_back(desc); - return *this; -} - -OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, - aclFormat format) -{ - aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); - if (desc == nullptr) { - ERROR_LOG("create tensor failed"); - return *this; - } - - outputDesc.emplace_back(desc); - return *this; -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakeLists.txt deleted file mode 100755 index 584132d80993d309434fb1303de83910a1989aba..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakeLists.txt +++ /dev/null @@ -1,69 +0,0 @@ -cmake_minimum_required(VERSION 3.16.0) -project(opp) -if(ENABLE_CROSS_COMPILE) - if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) - set(CROSS_COMPILE_PLATFORM aarch64) - else() - set(CROSS_COMPILE_PLATFORM x86_64) - endif() - set(PLATFORM ${CMAKE_SYSTEM_PROCESSOR}) - set(CMAKE_COMPILE_COMPILER_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/linux/${CROSS_COMPILE_PLATFORM}/) - set(CMAKE_COMPILE_RUNTIME_LIBRARY ${ASCEND_CANN_PACKAGE_PATH}/${PLATFORM}-linux/devlib/${CROSS_COMPILE_PLATFORM}/) - set(CMAKE_SYSTEM_PROCESSOR ${CROSS_COMPILE_PLATFORM}) - set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) - set(CMAKE_CXX_COMPILER ${CMAKE_CROSS_PLATFORM_COMPILER}) -else() - set(CMAKE_COMPILE ${CMAKE_CXX_COMPILER}) -endif() - -include(cmake/config.cmake) -include(cmake/func.cmake) -include(cmake/intf.cmake) - -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/framework) - add_subdirectory(framework) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_host) - add_subdirectory(op_host) -endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel) - add_subdirectory(op_kernel) -endif() -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - -# modify vendor_name in install.sh and upgrade.sh -add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh - COMMAND mkdir -p ${CMAKE_BINARY_DIR}/scripts - COMMAND cp -r ${CMAKE_SOURCE_DIR}/scripts/* ${CMAKE_BINARY_DIR}/scripts/ - COMMAND sed -i "s/vendor_name=customize/vendor_name=${vendor_name}/g" ${CMAKE_BINARY_DIR}/scripts/* -) -add_custom_target(modify_vendor ALL DEPENDS ${CMAKE_BINARY_DIR}/scripts/install.sh ${CMAKE_BINARY_DIR}/scripts/upgrade.sh) -install(DIRECTORY ${CMAKE_BINARY_DIR}/scripts/ DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_READ) - -install(FILES ${CMAKE_SOURCE_DIR}/custom.proto DESTINATION packages OPTIONAL) - -get_system_info(SYSTEM_INFO) - -# gen version.info -add_custom_target(gen_version_info ALL - COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/cmake/util/gen_version_info.sh ${ASCEND_CANN_PACKAGE_PATH} ${CMAKE_CURRENT_BINARY_DIR} -) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.info - DESTINATION packages/vendors/${vendor_name}/) - -# CPack config -set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) -set(CPACK_PACKAGE_VERSION ${CMAKE_PROJECT_VERSION}) -set(CPACK_PACKAGE_DESCRIPTION "CPack opp project") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CPack opp project") -set(CPACK_PACKAGE_DIRECTORY ${CMAKE_INSTALL_PREFIX}) -set(CPACK_PACKAGE_FILE_NAME "custom_opp_${SYSTEM_INFO}.run") -set(CPACK_GENERATOR External) -set(CPACK_CMAKE_GENERATOR "Unix Makefiles") -set(CPACK_EXTERNAL_ENABLE_STAGING TRUE) -set(CPACK_EXTERNAL_PACKAGE_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/makeself.cmake) -set(CPACK_EXTERNAL_BUILT_PACKAGES ${CPACK_PACKAGE_DIRECTORY}/_CPack_Packages/Linux/External/${CPACK_PACKAGE_FILE_NAME}/${CPACK_PACKAGE_FILE_NAME}) -include(CPack) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakePresets.json b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakePresets.json deleted file mode 100755 index f0933976520a982d22bfb3e19833a9d5e5698e08..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/CMakePresets.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "version": 1, - "cmakeMinimumRequired": { - "major": 3, - "minor": 19, - "patch": 0 - }, - "configurePresets": [ - { - "name": "default", - "displayName": "Default Config", - "description": "Default build using Unix Makefiles generator", - "generator": "Unix Makefiles", - "binaryDir": "${sourceDir}/build_out", - "cacheVariables": { - "CMAKE_BUILD_TYPE": { - "type": "STRING", - "value": "Release" - }, - "ENABLE_SOURCE_PACKAGE": { - "type": "BOOL", - "value": "True" - }, - "ENABLE_BINARY_PACKAGE": { - "type": "BOOL", - "value": "True" - }, - "ASCEND_COMPUTE_UNIT": { - "type": "STRING", - "value": "ascend310p;ascend310b;ascend910;ascend910b" - }, - "ENABLE_TEST": { - "type": "BOOL", - "value": "True" - }, - "vendor_name": { - "type": "STRING", - "value": "customize" - }, - "ASCEND_CANN_PACKAGE_PATH": { - "type": "PATH", - "value": "/usr/local/Ascend/ascend-toolkit/latest" - }, - "ASCEND_PYTHON_EXECUTABLE": { - "type": "STRING", - "value": "python3" - }, - "CMAKE_INSTALL_PREFIX": { - "type": "PATH", - "value": "${sourceDir}/build_out" - }, - "ENABLE_CROSS_COMPILE": { - "type": "BOOL", - "value": "False" - }, - "CMAKE_CROSS_PLATFORM_COMPILER": { - "type": "PATH", - "value": "/usr/bin/aarch64-linux-gnu-g++" - } - } - } - ] -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/build.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/build.sh deleted file mode 100755 index 4be96d7d8b99f0dd4b8052a16d17afe9c809fb54..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/build.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -script_path=$(realpath $(dirname $0)) - - -mkdir -p build_out -rm -rf build_out/* -cd build_out - -cmake_version=$(cmake --version | grep "cmake version" | awk '{print $3}') -if [ "$cmake_version" \< "3.19.0" ] ; then - opts=$(python3 $script_path/cmake/util/preset_parse.py $script_path/CMakePresets.json) - echo $opts - cmake .. $opts -else - cmake .. --preset=default -fi -target=package -if [ "$1"x != ""x ]; then target=$1; fi - -cmake --build . --target $target -j16 -if [ $? -ne 0 ]; then exit 1; fi - -if [ $target = "package" ]; then - if test -d ./op_kernel/binary ; then - ./cust*.run - if [ $? -ne 0 ]; then exit 1; fi - cmake --build . --target binary -j16 - if [ $? -ne 0 ]; then exit 1; fi - cmake --build . --target $target -j16 - fi -fi - -# for debug -# cd build_out -# make -# cpack -# verbose append -v diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/config.cmake b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/config.cmake deleted file mode 100755 index 886119daadd85495676c07dfb0b629e3deab8ccf..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/config.cmake +++ /dev/null @@ -1,25 +0,0 @@ - -set(CMAKE_CXX_FLAGS_DEBUG "") -set(CMAKE_CXX_FLAGS_RELEASE "") - -if (NOT DEFINED vendor_name) - set(vendor_name customize CACHE STRING "") -endif() -if (NOT DEFINED ASCEND_CANN_PACKAGE_PATH) - set(ASCEND_CANN_PACKAGE_PATH /usr/local/Ascend/latest CACHE PATH "") -endif() -if (NOT DEFINED ASCEND_PYTHON_EXECUTABLE) - set(ASCEND_PYTHON_EXECUTABLE python3 CACHE STRING "") -endif() -if (NOT DEFINED ASCEND_COMPUTE_UNIT) - message(FATAL_ERROR "ASCEND_COMPUTE_UNIT not set in CMakePreset.json ! -") -endif() -set(ASCEND_TENSOR_COMPILER_PATH ${ASCEND_CANN_PACKAGE_PATH}/compiler) -set(ASCEND_CCEC_COMPILER_PATH ${ASCEND_TENSOR_COMPILER_PATH}/ccec_compiler/bin) -set(ASCEND_AUTOGEN_PATH ${CMAKE_BINARY_DIR}/autogen) -set(ASCEND_FRAMEWORK_TYPE tensorflow) -file(MAKE_DIRECTORY ${ASCEND_AUTOGEN_PATH}) -set(CUSTOM_COMPILE_OPTIONS "custom_compile_options.ini") -execute_process(COMMAND rm -rf ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} - COMMAND touch ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS}) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/func.cmake b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/func.cmake deleted file mode 100755 index c9cd0fcbf15df84e7307af3c2cee2bc19d5f1c24..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/func.cmake +++ /dev/null @@ -1,229 +0,0 @@ - -function(get_system_info SYSTEM_INFO) - if (UNIX) - execute_process(COMMAND grep -i ^id= /etc/os-release OUTPUT_VARIABLE TEMP) - string(REGEX REPLACE "\n|id=|ID=|\"" "" SYSTEM_NAME ${TEMP}) - set(${SYSTEM_INFO} ${SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR} PARENT_SCOPE) - elseif (WIN32) - message(STATUS "System is Windows. Only for pre-build.") - else () - message(FATAL_ERROR "${CMAKE_SYSTEM_NAME} not support.") - endif () -endfunction() - -function(opbuild) - message(STATUS "Opbuild generating sources") - cmake_parse_arguments(OPBUILD "" "OUT_DIR;PROJECT_NAME;ACCESS_PREFIX" "OPS_SRC" ${ARGN}) - execute_process(COMMAND ${CMAKE_COMPILE} -g -fPIC -shared -std=c++11 ${OPBUILD_OPS_SRC} -D_GLIBCXX_USE_CXX11_ABI=0 - -I ${ASCEND_CANN_PACKAGE_PATH}/include -I ${CMAKE_CURRENT_SOURCE_DIR}/../op_kernel - -L ${ASCEND_CANN_PACKAGE_PATH}/lib64 -lexe_graph -lregister -ltiling_api - -o ${OPBUILD_OUT_DIR}/libascend_all_ops.so - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("build ops lib info: ${EXEC_INFO}") - message("build ops lib error: ${EXEC_ERROR}") - message(FATAL_ERROR "opbuild run failed!") - endif() - set(proj_env "") - set(prefix_env "") - if (NOT "${OPBUILD_PROJECT_NAME}x" STREQUAL "x") - set(proj_env "OPS_PROJECT_NAME=${OPBUILD_PROJECT_NAME}") - endif() - if (NOT "${OPBUILD_ACCESS_PREFIX}x" STREQUAL "x") - set(prefix_env "OPS_DIRECT_ACCESS_PREFIX=${OPBUILD_ACCESS_PREFIX}") - endif() - execute_process(COMMAND ${proj_env} ${prefix_env} ${ASCEND_CANN_PACKAGE_PATH}/toolkit/tools/opbuild/op_build - ${OPBUILD_OUT_DIR}/libascend_all_ops.so ${OPBUILD_OUT_DIR} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("opbuild ops info: ${EXEC_INFO}") - message("opbuild ops error: ${EXEC_ERROR}") - endif() - message(STATUS "Opbuild generating sources - done") -endfunction() - -function(add_ops_info_target) - cmake_parse_arguments(OPINFO "" "TARGET;OPS_INFO;OUTPUT;INSTALL_DIR" "" ${ARGN}) - get_filename_component(opinfo_file_path "${OPINFO_OUTPUT}" DIRECTORY) - add_custom_command(OUTPUT ${OPINFO_OUTPUT} - COMMAND mkdir -p ${opinfo_file_path} - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/parse_ini_to_json.py - ${OPINFO_OPS_INFO} ${OPINFO_OUTPUT} - ) - add_custom_target(${OPINFO_TARGET} ALL - DEPENDS ${OPINFO_OUTPUT} - ) - install(FILES ${OPINFO_OUTPUT} - DESTINATION ${OPINFO_INSTALL_DIR} - ) -endfunction() - -function(add_ops_compile_options OP_TYPE) - cmake_parse_arguments(OP_COMPILE "" "OP_TYPE" "COMPUTE_UNIT;OPTIONS" ${ARGN}) - file(APPEND ${ASCEND_AUTOGEN_PATH}/${CUSTOM_COMPILE_OPTIONS} - "${OP_TYPE},${OP_COMPILE_COMPUTE_UNIT},${OP_COMPILE_OPTIONS}\n") -endfunction() - -function(add_ops_impl_target) - cmake_parse_arguments(OPIMPL "" "TARGET;OPS_INFO;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) - add_custom_command(OUTPUT ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND mkdir -m 700 -p ${OPIMPL_OUT_DIR}/dynamic - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py - ${OPIMPL_OPS_INFO} - \"${OPIMPL_OPS_BATCH}\" \"${OPIMPL_OPS_ITERATE}\" - ${OPIMPL_IMPL_DIR} - ${OPIMPL_OUT_DIR}/dynamic - ${ASCEND_AUTOGEN_PATH} - - COMMAND rm -rf ${OPIMPL_OUT_DIR}/.impl_timestamp - COMMAND touch ${OPIMPL_OUT_DIR}/.impl_timestamp - DEPENDS ${OPIMPL_OPS_INFO} - ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_impl_build.py - ) - add_custom_target(${OPIMPL_TARGET} ALL - DEPENDS ${OPIMPL_OUT_DIR}/.impl_timestamp) - if (${ENABLE_SOURCE_PACKAGE}) - install(DIRECTORY ${OPIMPL_OUT_DIR}/dynamic - DESTINATION ${OPIMPL_INSTALL_DIR} - ) - endif() -endfunction() - -function(add_ops_replay_targets) - cmake_parse_arguments(OPREPLAY "" "OPS_INFO;COMPUTE_UNIT;IMPL_DIR;OUT_DIR;INSTALL_DIR" "OPS_BATCH;OPS_ITERATE" ${ARGN}) - # ccec compile options - set(ccec_base_opts -c -O2 --cce-aicore-only -mllvm -cce-aicore-function-stack-size=16000 - -mllvm -cce-aicore-record-overflow=false -std=c++17) - set(ccec_extopts_ascend310p --cce-aicore-arch=dav-m200 -mllvm -cce-aicore-fp-ceiling=2) - set(ccec_extopts_ascend910 --cce-aicore-arch=dav-c100) - set(ccec_extopts_ascend910b --cce-aicore-arch=dav-c220-cube) - file(MAKE_DIRECTORY ${OPREPLAY_OUT_DIR}) - execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_replay_build.py - ${OPREPLAY_OPS_INFO} - "${OPREPLAY_OPS_BATCH}" "${OPREPLAY_OPS_ITERATE}" - ${OPREPLAY_IMPL_DIR} - ${OPREPLAY_OUT_DIR} - ${OPREPLAY_COMPUTE_UNIT} - ) - file(GLOB replay_kernel_entries ${OPREPLAY_OUT_DIR}/*.cce) - if (NOT "${replay_kernel_entries}x" STREQUAL "x") - foreach(replay_kernel_file ${replay_kernel_entries}) - get_filename_component(replay_kernel_file_name "${replay_kernel_file}" NAME) - string(REPLACE "_entry.cce" "" op_kerne_name ${replay_kernel_file_name}) - file(GLOB replay_lib_src ${OPREPLAY_OUT_DIR}/${op_kerne_name}*.cpp) - set(OP_TILING_DATA_H_PATH ${OPREPLAY_OUT_DIR}/${op_kerne_name}_tiling_data.h) - add_library(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} SHARED ${replay_lib_src}) - if(EXISTS ${OP_TILING_DATA_H_PATH}) - target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - -include ${OP_TILING_DATA_H_PATH} - ) - endif() - target_compile_definitions(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - ${op_kerne_name}=${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} - ) - target_compile_options(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE - -D__ASCENDC_REPLAY__ - ) - target_link_libraries(replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} PRIVATE intf_pub - tikreplaylib::${OPREPLAY_COMPUTE_UNIT} - register - ) - add_custom_command(OUTPUT ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - COMMAND ccec ${ccec_base_opts} ${ccec_extopts_${OPREPLAY_COMPUTE_UNIT}} ${replay_kernel_file} - -o ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - DEPENDS ${replay_kernel_file} - ) - add_custom_target(replay_kernel_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} ALL - DEPENDS ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - ) - install(TARGETS replay_${op_kerne_name}_${OPREPLAY_COMPUTE_UNIT} - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay - ) - install(FILES ${OPREPLAY_OUT_DIR}/${op_kerne_name}_entry_${OPREPLAY_COMPUTE_UNIT}.o - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_replay - ) - endforeach() - endif() -endfunction() - -function(add_npu_support_target) - cmake_parse_arguments(NPUSUP "" "TARGET;OPS_INFO_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) - get_filename_component(npu_sup_file_path "${NPUSUP_OUT_DIR}" DIRECTORY) - add_custom_command(OUTPUT ${NPUSUP_OUT_DIR}/npu_supported_ops.json - COMMAND mkdir -p ${NPUSUP_OUT_DIR} - COMMAND ${CMAKE_SOURCE_DIR}/cmake/util/gen_ops_filter.sh - ${NPUSUP_OPS_INFO_DIR} - ${NPUSUP_OUT_DIR} - ) - add_custom_target(npu_supported_ops ALL - DEPENDS ${NPUSUP_OUT_DIR}/npu_supported_ops.json - ) - install(FILES ${NPUSUP_OUT_DIR}/npu_supported_ops.json - DESTINATION ${NPUSUP_INSTALL_DIR} - ) -endfunction() - -function(add_bin_compile_target) - cmake_parse_arguments(BINCMP "" "TARGET;OPS_INFO;COMPUTE_UNIT;IMPL_DIR;ADP_DIR;OUT_DIR;INSTALL_DIR" "" ${ARGN}) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/src) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/gen) - execute_process(COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_bin_param_build.py - ${BINCMP_OPS_INFO} ${BINCMP_OUT_DIR}/gen ${BINCMP_COMPUTE_UNIT} - RESULT_VARIABLE EXEC_RESULT - OUTPUT_VARIABLE EXEC_INFO - ERROR_VARIABLE EXEC_ERROR - ) - if (${EXEC_RESULT}) - message("ops binary compile scripts gen info: ${EXEC_INFO}") - message("ops binary compile scripts gen error: ${EXEC_ERROR}") - message(FATAL_ERROR "ops binary compile scripts gen failed!") - endif() - if (NOT TARGET binary) - add_custom_target(binary) - endif() - add_custom_target(${BINCMP_TARGET} - COMMAND cp -r ${BINCMP_IMPL_DIR}/*.* ${BINCMP_OUT_DIR}/src - ) - add_custom_target(${BINCMP_TARGET}_gen_ops_config - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/insert_simplified_keys.py -p ${BINCMP_OUT_DIR}/bin - COMMAND ${ASCEND_PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/cmake/util/ascendc_ops_config.py -p ${BINCMP_OUT_DIR}/bin - -s ${BINCMP_COMPUTE_UNIT} - ) - add_dependencies(binary ${BINCMP_TARGET}_gen_ops_config) - file(GLOB bin_scripts ${BINCMP_OUT_DIR}/gen/*.sh) - foreach(bin_script ${bin_scripts}) - get_filename_component(bin_file ${bin_script} NAME_WE) - string(REPLACE "-" ";" bin_sep ${bin_file}) - list(GET bin_sep 0 op_type) - list(GET bin_sep 1 op_file) - list(GET bin_sep 2 op_index) - if (NOT TARGET ${BINCMP_TARGET}_${op_file}_copy) - file(MAKE_DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file}) - add_custom_target(${BINCMP_TARGET}_${op_file}_copy - COMMAND cp ${BINCMP_ADP_DIR}/${op_file}.py ${BINCMP_OUT_DIR}/src/${op_type}.py - ) - install(DIRECTORY ${BINCMP_OUT_DIR}/bin/${op_file} - DESTINATION ${BINCMP_INSTALL_DIR}/${BINCMP_COMPUTE_UNIT} OPTIONAL - ) - install(FILES ${BINCMP_OUT_DIR}/bin/${op_file}.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT}/ OPTIONAL - ) - endif() - add_custom_target(${BINCMP_TARGET}_${op_file}_${op_index} - COMMAND export HI_PYTHON=${ASCEND_PYTHON_EXECUTABLE} && bash ${bin_script} ${BINCMP_OUT_DIR}/src/${op_type}.py ${BINCMP_OUT_DIR}/bin/${op_file} - WORKING_DIRECTORY ${BINCMP_OUT_DIR} - ) - add_dependencies(${BINCMP_TARGET}_${op_file}_${op_index} ${BINCMP_TARGET} ${BINCMP_TARGET}_${op_file}_copy) - add_dependencies(${BINCMP_TARGET}_gen_ops_config ${BINCMP_TARGET}_${op_file}_${op_index}) - endforeach() - install(FILES ${BINCMP_OUT_DIR}/bin/binary_info_config.json - DESTINATION ${BINCMP_INSTALL_DIR}/config/${BINCMP_COMPUTE_UNIT} OPTIONAL - ) -endfunction() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/intf.cmake b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/intf.cmake deleted file mode 100755 index d2643bbc752aeaf339a90b0703848075d5a478f5..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/intf.cmake +++ /dev/null @@ -1,28 +0,0 @@ - -add_library(intf_pub INTERFACE) -target_compile_options(intf_pub INTERFACE - -fPIC - -fvisibility=hidden - -fvisibility-inlines-hidden - $<$:-O2> - $<$:-O0 -g> - $<$:-std=c++11> - $<$,$>:-ftrapv -fstack-check> - $<$:-pthread -Wfloat-equal -Wshadow -Wformat=2 -Wno-deprecated -Wextra> - $,-fstack-protector-strong,-fstack-protector-all> -) -target_compile_definitions(intf_pub INTERFACE - _GLIBCXX_USE_CXX11_ABI=0 - $<$:_FORTIFY_SOURCE=2> -) -target_include_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/include - ${CMAKE_CURRENT_SOURCE_DIR}/op_kernel -) -target_link_options(intf_pub INTERFACE - $<$,EXECUTABLE>:-pie> - $<$:-s> - -Wl,-z,relro - -Wl,-z,now - -Wl,-z,noexecstack -) -target_link_directories(intf_pub INTERFACE ${ASCEND_CANN_PACKAGE_PATH}/lib64) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/makeself.cmake b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/makeself.cmake deleted file mode 100755 index 48c565bfb4f2edc6534a81abaa8565c4cf2dfc30..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/makeself.cmake +++ /dev/null @@ -1,17 +0,0 @@ -execute_process(COMMAND chmod +x ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh) -execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself.sh - --header ${CMAKE_CURRENT_LIST_DIR}/util/makeself/makeself-header.sh - --help-header ./help.info - --gzip --complevel 4 --nomd5 --sha256 - ./ ${CPACK_PACKAGE_FILE_NAME} "version:1.0" ./install.sh - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} - RESULT_VARIABLE EXEC_RESULT - ERROR_VARIABLE EXEC_ERROR -) -if (NOT "${EXEC_RESULT}x" STREQUAL "0x") - message(FATAL_ERROR "CPack Command error: ${EXEC_RESULT}\n${EXEC_ERROR}") -endif() -execute_process(COMMAND cp ${CPACK_EXTERNAL_BUILT_PACKAGES} ${CPACK_PACKAGE_DIRECTORY}/ - COMMAND echo "Copy ${CPACK_EXTERNAL_BUILT_PACKAGES} to ${CPACK_PACKAGE_DIRECTORY}/" - WORKING_DIRECTORY ${CPACK_TEMPORARY_DIRECTORY} -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/__init__.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/__init__.py deleted file mode 100755 index c4ddc893a9275672e046b1311c6ee2d1578f405e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import os - -PYF_PATH = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(PYF_PATH) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_bin_param_build.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_bin_param_build.py deleted file mode 100755 index decf34544880c68fd89e809b15d415844b9882e6..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_bin_param_build.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os -import json -import hashlib -import const_var -import opdesc_parser - -PYF_PATH = os.path.dirname(os.path.realpath(__file__)) - - -class BinParamBuilder(opdesc_parser.OpDesc): - def __init__(self: any, op_type: str): - super().__init__(op_type) - self.soc = '' - self.out_path = '' - - def set_soc_version(self: any, soc: str): - self.soc = soc - - def set_out_path(self: any, out_path: str): - self.out_path = out_path - - def gen_input_json(self: any): - key_map = {} - count = len(self.input_dtype[0].split(',')) - for i in range(0, count): - inputs = [] - outputs = [] - attrs = [] - op_node = {} - for idx in range(0, len(self.input_name)): - idtypes = self.input_dtype[idx].split(',') - ifmts = self.input_fmt[idx].split(',') - itype = self.input_type[idx] - para = {} - para['name'] = self.input_name[idx] - para['index'] = idx - para['dtype'] = idtypes[i] - para['format'] = ifmts[i] - para['paramType'] = itype - para['shape'] = [-2] - if itype == 'dynamic': - inputs.append([para]) - else: - inputs.append(para) - for idx in range(0, len(self.output_name)): - odtypes = self.output_dtype[idx].split(',') - ofmts = self.output_fmt[idx].split(',') - otype = self.output_type[idx] - para = {} - para['name'] = self.output_name[idx] - para['index'] = idx - para['dtype'] = odtypes[i] - para['format'] = ofmts[i] - para['paramType'] = otype - para['shape'] = [-2] - if otype == 'dynamic': - outputs.append([para]) - else: - outputs.append(para) - for attr in self.attr_list: - att = {} - att['name'] = attr - atype = self.attr_val.get(attr).get('type').lower() - atype = atype.replace('list', 'list_') - att['dtype'] = atype - att['value'] = const_var.ATTR_DEF_VAL.get(atype) - attrs.append(att) - op_node['bin_filename'] = '' - op_node['inputs'] = inputs - op_node['outputs'] = outputs - if len(attrs) > 0: - op_node['attrs'] = attrs - param = {} - param['op_type'] = self.op_type - param['op_list'] = [op_node] - objstr = json.dumps(param, indent=' ') - md5sum = hashlib.md5(objstr.encode('utf-8')).hexdigest() - while key_map.get(md5sum) is not None: - objstr += '1' - md5sum = hashlib.md5(objstr.encode('utf-8')).hexdigest() - key_map[md5sum] = md5sum - bin_file = self.op_type + '_' + md5sum - op_node['bin_filename'] = bin_file - param_file = os.path.join(self.out_path, bin_file + '_param.json') - param_file = os.path.realpath(param_file) - with os.fdopen(os.open(param_file, const_var.WFLAGS, const_var.WMODES), 'w') as fd: - json.dump(param, fd, indent=' ') - self._write_buld_cmd(param_file, bin_file, i) - - - def _write_buld_cmd(self: any, param_file: str, bin_file: str, index: int): - hard_soc = const_var.SOC_MAP_EXT.get(self.soc) - if not hard_soc: - hard_soc = soc.capitalize() - name_com = [self.op_type, self.op_file, str(index)] - compile_file = os.path.join(self.out_path, '-'.join(name_com) + '.sh') - compile_file = os.path.realpath(compile_file) - with os.fdopen(os.open(compile_file, const_var.WFLAGS, const_var.WMODES), 'w') as fd: - fd.write('#!/bin/bash\n') - fd.write('echo "[{}] Generating {} ..."\n'.format(hard_soc, bin_file)) - cmd = const_var.BIN_CMD.format(fun=self.op_intf, soc=hard_soc, param=param_file, impl='""') - fd.write(cmd) - chk = const_var.CHK_CMD.format(res_file=bin_file + '.json') - fd.write(chk) - chk = const_var.CHK_CMD.format(res_file=bin_file + '.o') - fd.write(chk) - fd.write('echo "[{}] Generating {} Done"\n'.format(hard_soc, bin_file)) - - -def gen_bin_param_file(cfgfile: str, out_dir: str, soc: str): - op_descs = opdesc_parser.get_op_desc(cfgfile, [], [], BinParamBuilder, None) - for op_desc in op_descs: - op_desc.set_soc_version(soc) - op_desc.set_out_path(out_dir) - op_desc.gen_input_json() - - -if __name__ == '__main__': - if len(sys.argv) <= 3: - raise RuntimeError('arguments must greater than 3') - gen_bin_param_file(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_impl_build.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_impl_build.py deleted file mode 100755 index 3a973cae8acd425c365a2a075968bb740c28f4fd..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_impl_build.py +++ /dev/null @@ -1,447 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os -import re -import stat -import opdesc_parser -import const_var - -PYF_PATH = os.path.dirname(os.path.realpath(__file__)) - -IMPL_HEAD = ''' -import os, sys -import ctypes -import json -import shutil -from tbe.common.platform import get_soc_spec -from tbe.common.utils import para_check -from tbe.tikcpp import compile_op, replay_op, check_op_cap, generalize_op_params, get_code_channel, OpInfo -from tbe.common.buildcfg import get_default_build_config -from impl.util.platform_adapter import tbe_register -from tbe.common.buildcfg import get_current_build_config -PYF_PATH = os.path.dirname(os.path.realpath(__file__)) - -DTYPE_MAP = {"float32": ["DT_FLOAT", "float"], - "float16": ["DT_FLOAT16", "half"], - "int8": ["DT_INT8", "int8_t"], - "int16": ["DT_INT16", "int16_t"], - "int32": ["DT_INT32", "int32_t"], - "int64": ["DT_INT64", "int64_t"], - "uint1": ["DT_UINT1", "uint8_t"], - "uint8": ["DT_UINT8", "uint8_t"], - "uint16": ["DT_UINT16", "uint16_t"], - "uint32": ["DT_UINT32", "uint32_t"], - "uint64": ["DT_UINT64", "uint64_t"], - "bool": ["DT_BOOL", "bool"], - "double": ["DT_DOUBLE", "double"], - "dual": ["DT_DUAL", "unknown"], - "dual_sub_int8": ["DT_DUAL_SUB_INT8", "unknown"], - "dual_sub_uint8": ["DT_DUAL_SUB_UINT8", "unknown"], - "string": ["DT_STRING", "unknown"], - "complex64": ["DT_COMPLEX64", "unknown"], - "complex128": ["DT_COMPLEX128", "unknown"], - "qint8": ["DT_QINT8", "unknown"], - "qint16": ["DT_QINT16", "unknown"], - "qint32": ["DT_QINT32", "unknown"], - "quint8": ["DT_QUINT8", "unknown"], - "quint16": ["DT_QUINT16", "unknown"], - "resource": ["DT_RESOURCE", "unknown"], - "string_ref": ["DT_STRING_REF", "unknown"], - "int4": ["DT_INT4", "int8_t"], - "bfloat16": ["DT_BF16", "bfloat16_t"]} - -def get_dtype_fmt_options(__inputs__, __outputs__): - options = [] - for x in __inputs__ + __outputs__: - x_n = x.get("param_name").upper() - x_fmt = x.get("format") - x_dtype = x.get("dtype") - options.append("-DDTYPE_{n}={t}".format(n=x_n, t=DTYPE_MAP.get(x_dtype)[1])) - options.append("-DORIG_DTYPE_{n}={ot}".format(n=x_n, ot=DTYPE_MAP.get(x_dtype)[0])) - options.append("-DFORMAT_{n}=FORMAT_{f}".format(n=x_n, f=x_fmt)) - return options - -def load_dso(so_path): - try: - ctypes.CDLL(so_path) - except OSError as error : - print(error) - raise RuntimeError("cannot open %s" %(so_path)) - else: - print("load so succ ", so_path) - -def get_shortsoc_compile_option(compile_option_list: list, shortsoc:str): - compile_options = [] - if shortsoc in compile_option_list: - compile_options = compile_option_list[shortsoc] - elif '__ALLSOC__' in compile_option_list: - compile_options = compile_option_list['__ALLSOC__'] - return compile_options - -''' - -IMPL_API = ''' -@tbe_register.register_operator("{}") -@para_check.check_op_params({}) -def {}({}, kernel_name="{}", impl_mode=""): - if get_current_build_config("enable_op_prebuild"): - return - __inputs__, __outputs__, __attrs__ = _build_args({}) - options = get_dtype_fmt_options(__inputs__, __outputs__) - options += ["-x", "cce"] - ccec = os.environ.get('CCEC_REAL_PATH') - if ccec is None: - ccec = shutil.which("ccec") - if ccec != None: - ccec_path = os.path.dirname(ccec) - tikcpp_path = os.path.realpath(os.path.join(ccec_path, "..", "..", "tikcpp")) - else: - tikcpp_path = os.path.realpath("/usr/local/Ascend/latest/compiler/tikcpp") - options.append("-I" + tikcpp_path) - options.append("-I" + os.path.join(tikcpp_path, "..", "..", "include")) - options.append("-I" + os.path.join(tikcpp_path, "tikcfw")) - options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "impl")) - options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "interface")) - options.append("-I" + os.path.join(PYF_PATH, "..", "ascendc", "common")) - if impl_mode == "high_performance": - options.append("-DHIGH_PERFORMANCE=1") - elif impl_mode == "high_precision": - options.append("-DHIGH_PRECISION=1") - if get_default_build_config("enable_deterministic_mode") == 1: - options.append("-DDETEMINISTIC_MODE=1") - - custom_compile_options = {}, - custom_all_compile_options = {}, - soc_version = get_soc_spec("SOC_VERSION") - soc_short = get_soc_spec("SHORT_SOC_VERSION").lower() - custom_compile_options_soc = get_shortsoc_compile_option(custom_compile_options[0], soc_short) - custom_all_compile_options_soc = get_shortsoc_compile_option(custom_all_compile_options[0], soc_short) - options += custom_all_compile_options_soc - options += custom_compile_options_soc - - origin_func_name = "{}" - ascendc_src_dir = "{}" - ascendc_src_file = "{}" - src = os.path.join(PYF_PATH, "..", "ascendc", ascendc_src_dir, ascendc_src_file) - if not os.path.exists(src): - src = os.path.join(PYF_PATH, ascendc_src_file) -''' - -REPLAY_OP_API = ''' - print("start replay Acend C Operator {}, kernel name is {}") - tikreplay_codegen_path = tikcpp_path + "/tikreplaylib/lib" - tikreplay_stub_path = tikcpp_path + "/tikreplaylib/lib/" + soc_version - print("start load libtikreplaylib_codegen.so and libtikreplaylib_stub.so") - codegen_so_path = tikreplay_codegen_path + "/libtikreplaylib_codegen.so" - replaystub_so_path = tikreplay_stub_path + "/libtikreplaylib_stub.so" - if PYF_PATH.endswith("dynamic"): - op_replay_path = os.path.join(PYF_PATH, "..", "..", "op_replay") - else: - op_replay_path = os.path.join(PYF_PATH, "..", "op_replay") - replayapi_so_path = os.path.join(op_replay_path, "libreplay_{}_" + soc_short + ".so") - load_dso(codegen_so_path) - load_dso(replaystub_so_path) - load_dso(replayapi_so_path) - op_type = "{}" - entry_obj = os.path.join(op_replay_path, "{}_entry_" + soc_short + ".o") - code_channel = get_code_channel(src, kernel_name, op_type, options) - op_info = OpInfo(kernel_name = kernel_name, op_type = op_type, inputs = __inputs__, outputs = __outputs__,\\ - attrs = __attrs__, impl_mode = impl_mode) - res, msg = replay_op(op_info, entry_obj, code_channel, src, options) - if not res: - print("call replay op failed for %s and get into call compile op" %(msg)) - compile_op(src, origin_func_name, op_info, options, code_channel, '{}') -''' - -COMPILE_OP_API = ''' - print("start compile Ascend C operator {}. kernel name is {}") - op_type = "{}" - code_channel = get_code_channel(src, kernel_name, op_type, options) - op_info = OpInfo(kernel_name = kernel_name, op_type = op_type, inputs = __inputs__, outputs = __outputs__,\\ - attrs = __attrs__, impl_mode = impl_mode, origin_inputs=[{}], origin_outputs = [{}]) - compile_op(src, origin_func_name, op_info, options, code_channel, '{}') -''' - -SUP_API = ''' -def {}({}, impl_mode=""): - __inputs__, __outputs__, __attrs__ = _build_args({}) - ret_str = check_op_cap("{}", "{}", __inputs__, __outputs__, __attrs__) - ret_dict = json.loads(ret_str) - err_code = ret_dict.get("ret_code") - sup = "Unknown" - reason = "Unknown reason" - if err_code is not None: - if err_code is 0: - sup = "True" - reason = "" - elif err_code is 1: - sup = "False" - reason = ret_dict.get("reason") - else: - sup = "Unknown" - reason = ret_dict.get("reason") - return sup, reason -''' -CAP_API = ''' -def {}({}, impl_mode=""): - __inputs__, __outputs__, __attrs__ = _build_args({}) - result = check_op_cap("{}", "{}", __inputs__, __outputs__, __attrs__) - return result.decode("utf-8") -''' -GLZ_API = ''' -@tbe_register.register_param_generalization("{}") -def {}_generalization({}, generalize_config=None): - __inputs__, __outputs__, __attrs__ = _build_args({}) - ret_str = generalize_op_params("{}", __inputs__, __outputs__, __attrs__, generalize_config) - return [json.loads(ret_str)] -''' - -ATTR_DEFAULT = {'bool': 'False', 'int': '0', 'float': '0.0', 'listInt': '[]', - 'listFloat': '[]', 'listBool': '[]', 'listListInt': '[[]]', 'str': ''} - - -def optype_snake(origin_str): - temp_str = origin_str[0].lower() + origin_str[1:] - new_str = re.sub(r'([A-Z])', r'_\1', temp_str).lower() - return new_str - - -class AdpBuilder(opdesc_parser.OpDesc): - def __init__(self: any, op_type: str): - self.argsname = [] - self.argsdefv = [] - self.op_compile_option:str = '{}' - super().__init__(op_type) - - - def write_adapt(self: any, impl_path, path: str, op_compile_option_all:list = None): - self._build_paradefault() - if impl_path != "": - src_file = os.path.join(impl_path, self.op_file + '.cpp') - if not os.path.exists(src_file): - return - out_path = os.path.abspath(path) - if self.dynamic_shape and not out_path.endswith('dynamic'): - out_path = os.path.join(path, 'dynamic') - os.makedirs(out_path, mode=0o700, exist_ok=True) - adpfile = os.path.join(out_path, self.op_file + '.py') - self._gen_op_compile_option(op_compile_option_all) - with os.fdopen(os.open(adpfile, const_var.WFLAGS, const_var.WMODES), 'w') as fd: - self._write_head(fd) - self._write_argparse(fd) - self._write_impl(fd) - if self.op_chk_support: - self._write_cap('check_supported', fd) - self._write_cap('get_op_support_info', fd) - if self.op_fmt_sel: - self._write_cap('op_select_format', fd) - self._write_cap('get_op_specific_info', fd) - if self.op_range_limit == 'limited' or self.op_range_limit == 'dynamic': - self._write_glz(fd) - - - def _gen_op_compile_option(self:any, op_compile_option_all:list =None): - if op_compile_option_all is not None: - if self.op_type in op_compile_option_all: - self.op_compile_option = op_compile_option_all[self.op_type] - elif "__all__" in op_compile_option_all: - self.op_compile_option = op_compile_option_all["__all__"] - - - def _ip_argpack(self: any, default: bool = True) -> list: - args = [] - for i in range(len(self.input_name)): - arg = self.input_name[i] - if default and self.argsdefv[i] is not None: - arg += '=' + self.argsdefv[i] - args.append(arg) - return args - - def _op_argpack(self: any, default: bool = True) -> list: - args = [] - argidx = len(self.input_name) - for i in range(len(self.output_name)): - arg = self.output_name[i] - if default and self.argsdefv[i + argidx] is not None: - arg += '=' + self.argsdefv[i + argidx] - args.append(arg) - return args - - def _attr_argpack(self: any, default: bool = True) -> list: - args = [] - argidx = len(self.input_name) + len(self.output_name) - for i in range(len(self.attr_list)): - att = self.attr_list[i] - arg = att - if default and self.argsdefv[i + argidx] is not None: - if self.attr_val.get(att).get('type') == 'str': - arg += '="' + self.argsdefv[i + argidx] + '"' - elif self.attr_val.get(att).get('type') == 'bool': - arg += '=' + self.argsdefv[i + argidx].capitalize() - else: - arg += '=' + self.argsdefv[i + argidx] - args.append(arg) - return args - - def _build_paralist(self: any, default: bool = True) -> str: - args = [] - args.extend(self._ip_argpack(default)) - args.extend(self._op_argpack(default)) - args.extend(self._attr_argpack(default)) - return ', '.join(args) - - def _io_parachk(self: any, types: list, type_name: str) -> list: - chk = [] - for iot in types: - if iot == 'optional': - ptype = 'OPTION' - else: - ptype = iot.upper() - chk.append('para_check.{}_{}'.format(ptype, type_name)) - return chk - - def _attr_parachk(self: any) -> list: - chk = [] - for att in self.attr_list: - if self.attr_val.get(att).get('paramType') == 'optional': - pt = 'OPTION' - else: - pt = self.attr_val.get(att).get('paramType').upper() - att_type = self.attr_val.get(att).get('type').upper() - att_type = att_type.replace('LIST', 'LIST_') - chk.append('para_check.{}_ATTR_{}'.format(pt, att_type)) - return chk - - def _build_parachk(self: any) -> str: - chk = [] - chk.extend(self._io_parachk(self.input_type, 'INPUT')) - chk.extend(self._io_parachk(self.output_type, 'OUTPUT')) - chk.extend(self._attr_parachk()) - chk.append('para_check.KERNEL_NAME') - return ', '.join(chk) - - def _build_paradefault(self: any): - optional = False - argtypes = [] - argtypes.extend(self.input_type) - argtypes.extend(self.output_type) - for atype in argtypes: - if atype == 'optional': - optional = True - if optional: - self.argsdefv.append('None') - else: - self.argsdefv.append(None) - for attr in self.attr_list: - atype = self.attr_val.get(attr).get('paramType') - if atype == 'optional': - optional = True - attrval = self.attr_val.get(attr).get('defaultValue') - if attrval is not None: - optional = True - if type == "bool": - attrval = attrval.capitalize() - elif type == "str": - attrval = "\"" + attrval + "\"" - self.argsdefv.append(attrval) - continue - if optional: - self.argsdefv.append(ATTR_DEFAULT.get(self.attr_val.get(attr).get('type'))) - else: - self.argsdefv.append(None) - - def _write_head(self: any, fd: object): - fd.write(IMPL_HEAD) - - def _write_argparse(self: any, fd: object): - args = self._build_paralist(False) - fd.write('def _build_args({}):\n'.format(args)) - fd.write(' __inputs__ = []\n') - fd.write(' for arg in [{}]:\n'.format(', '.join(self.input_name))) - fd.write(' if arg != None:\n') - fd.write(' if isinstance(arg, (list, tuple)):\n') - fd.write(' if len(arg) == 0:\n') - fd.write(' continue\n') - fd.write(' __inputs__.append(arg[0])\n') - fd.write(' else:\n') - fd.write(' __inputs__.append(arg)\n') - fd.write(' __outputs__ = []\n') - fd.write(' for arg in [{}]:\n'.format(', '.join(self.output_name))) - fd.write(' if arg != None:\n') - fd.write(' if isinstance(arg, (list, tuple)):\n') - fd.write(' if len(arg) == 0:\n') - fd.write(' continue\n') - fd.write(' __outputs__.append(arg[0])\n') - fd.write(' else:\n') - fd.write(' __outputs__.append(arg)\n') - fd.write(' __attrs__ = []\n') - for attr in self.attr_list: - fd.write(' if {} != None:\n'.format(attr)) - fd.write(' attr = {}\n') - fd.write(' attr["name"] = "{}"\n'.format(attr)) - fd.write(' attr["dtype"] = "{}"\n'.format(self.attr_val.get(attr).get('type'))) - fd.write(' attr["value"] = {}\n'.format(attr)) - fd.write(' __attrs__.append(attr)\n') - fd.write(' return __inputs__, __outputs__, __attrs__\n') - - def _write_impl(self: any, fd: object): - argsdef = self._build_paralist() - argsval = self._build_paralist(False) - pchk = self._build_parachk() - if len(self.kern_name) > 0: - kern_name = self.kern_name - else: - kern_name = self.op_intf - src = self.op_file + '.cpp' - fd.write(IMPL_API.format(self.op_type, pchk, self.op_intf, argsdef, kern_name, argsval,\ - self.custom_compile_options, self.custom_all_compile_options, self.op_intf,\ - optype_snake(self.op_type), src)) - if self.op_replay_flag: - fd.write(REPLAY_OP_API.format(self.op_type, kern_name, self.op_file, self.op_type, self.op_file,\ - self.op_compile_option)) - else: - fd.write(COMPILE_OP_API.format(self.op_type, kern_name, self.op_type, ', '.join(self.input_name),\ - ', '.join(self.output_name), self.op_compile_option)) - - def _write_cap(self: any, cap_name: str, fd: object): - argsdef = self._build_paralist() - argsval = self._build_paralist(False) - if cap_name == 'check_supported': - fd.write(SUP_API.format(cap_name, argsdef, argsval, cap_name, self.op_type)) - else: - fd.write(CAP_API.format(cap_name, argsdef, argsval, cap_name, self.op_type)) - - def _write_glz(self: any, fd: object): - argsdef = self._build_paralist() - argsval = self._build_paralist(False) - fd.write(GLZ_API.format(self.op_type, self.op_intf, argsdef, argsval, self.op_type)) - - -def write_scripts(cfgfile: str, cfgs: dict, dirs: dict, ops: list = None, op_compile_option:list = None): - batch_lists = cfgs.get(const_var.REPLAY_BATCH).split(';') - iterator_lists = cfgs.get(const_var.REPLAY_ITERATE).split(';') - file_map = {} - op_descs = opdesc_parser.get_op_desc(cfgfile, batch_lists, iterator_lists, AdpBuilder,\ - ops, dirs.get(const_var.AUTO_GEN_DIR)) - for op_desc in op_descs: - op_desc.write_adapt(dirs.get(const_var.CFG_IMPL_DIR), dirs.get(const_var.CFG_OUT_DIR), op_compile_option) - file_map[op_desc.op_type] = op_desc.op_file - return file_map - -if __name__ == '__main__': - if len(sys.argv) <= 6: - raise RuntimeError('arguments must greater equal than 6') - rep_cfg = {} - rep_cfg[const_var.REPLAY_BATCH] = sys.argv[2] - rep_cfg[const_var.REPLAY_ITERATE] = sys.argv[3] - cfg_dir = {} - cfg_dir[const_var.CFG_IMPL_DIR] = sys.argv[4] - cfg_dir[const_var.CFG_OUT_DIR] = sys.argv[5] - cfg_dir[const_var.AUTO_GEN_DIR] = sys.argv[6] - write_scripts(cfgfile=sys.argv[1], cfgs=rep_cfg, dirs=cfg_dir) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_ops_config.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_ops_config.py deleted file mode 100755 index 7a97180beda87facffebb18a9784264f6e0e8964..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_ops_config.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os -import glob -import json -import argparse -import const_var - - -def load_json(json_file: str): - with open(json_file, encoding='utf-8') as file: - json_content = json.load(file) - return json_content - - -def get_specified_suffix_file(root_dir, suffix): - specified_suffix = os.path.join(root_dir, '**/*.{}'.format(suffix)) - all_suffix_files = glob.glob(specified_suffix, recursive=True) - return all_suffix_files - - -def add_simplified_config(op_type, key, core_type, objfile, config): - simple_cfg = config.get('binary_info_config.json') - op_cfg = simple_cfg.get(op_type) - if not op_cfg: - op_cfg = {} - op_cfg['dynamicRankSupport'] = True - op_cfg['simplifiedKeyMode'] = 0 - op_cfg['binaryList'] = [] - simple_cfg[op_type] = op_cfg - bin_list = op_cfg.get('binaryList') - bin_list.append({'coreType': core_type, 'simplifiedKey': key, 'binPath': objfile}) - - -def add_op_config(op_file, bin_info, config): - op_cfg = config.get(op_file) - if not op_cfg: - op_cfg = {} - op_cfg['binList'] = [] - config[op_file] = op_cfg - op_cfg.get('binList').append(bin_info) - - -def gen_ops_config(json_file, soc, config): - core_type_map = {"MIX": 0, "AiCore": 1, "VectorCore": 2} - contents = load_json(json_file) - if ('binFileName' not in contents) or ('supportInfo' not in contents): - return - json_base_name = os.path.basename(json_file) - op_dir = os.path.basename(os.path.dirname(json_file)) - support_info = contents.get('supportInfo') - bin_name = contents.get('binFileName') - bin_suffix = contents.get('binFileSuffix') - core_type = core_type_map.get(contents.get("coreType")) - bin_file_name = bin_name + bin_suffix - op_type = bin_name.split('_')[0] - op_file = op_dir + '.json' - bin_info = {} - keys = support_info.get('simplifiedKey') - if keys: - bin_info['simplifiedKey'] = keys - for key in keys: - add_simplified_config(op_type, key, core_type, os.path.join(soc, op_dir, bin_file_name), config) - bin_info['staticKey'] = support_info.get('staticKey') - bin_info['int64Mode'] = support_info.get('int64Mode') - bin_info['inputs'] = support_info.get('inputs') - bin_info['outputs'] = support_info.get('outputs') - if support_info.get('attrs'): - bin_info['attrs'] = support_info.get('attrs') - bin_info['binInfo'] = {'jsonFilePath': os.path.join(soc, op_dir, json_base_name)} - add_op_config(op_file, bin_info, config) - - -def gen_all_config(root_dir, soc): - suffix = 'json' - config = {} - config['binary_info_config.json'] = {} - all_json_files = get_specified_suffix_file(root_dir, suffix) - for _json in all_json_files: - gen_ops_config(_json, soc, config) - for cfg_key in config.keys(): - cfg_file = os.path.join(root_dir, cfg_key) - with os.fdopen(os.open(cfg_file, const_var.WFLAGS, const_var.WMODES), 'w') as fd: - json.dump(config.get(cfg_key), fd, indent=' ') - - -def args_prase(): - parser = argparse.ArgumentParser() - parser.add_argument('-p', - '--path', - nargs='?', - required=True, - help='Parse the path of the json file.') - parser.add_argument('-s', - '--soc', - nargs='?', - required=True, - help='Parse the soc_version of ops.') - return parser.parse_args() - - -def main(): - args = args_prase() - gen_all_config(args.path, args.soc) - - -if __name__ == '__main__': - main() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_replay_build.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_replay_build.py deleted file mode 100755 index 1cac7d911b84df4f3ef3a83ce9cac65ce2e89e0b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/ascendc_replay_build.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os -import opdesc_parser -import replay_codegen -import const_var -from replay_codegen import ReplayCodeGenParams - -PYF_PATH = os.path.dirname(os.path.realpath(__file__)) - - -class ReplayBuilder(opdesc_parser.OpDesc): - def __init__(self: any, op_type: str): - super().__init__(op_type) - - def gen_replay_source(self: any, impl_path: str, out_path: str, ops_product: str): - if not self.op_replay_flag: - print('{} replay not enabled'.format(self.op_type)) - return - argn = len(self.input_name) + len(self.output_name) + 1 - if self.op_replay_batch: - print('{} replay in batch mode'.format(self.op_type)) - else: - print('{} replay in normal mode'.format(self.op_type)) - if impl_path.endswith('op_kernel'): - implf = os.path.join(impl_path, self.op_file + '.cpp') - tiling_file = os.path.join(impl_path, "../op_host", self.op_file + '_tiling.h') - else: - if self.dynamic_shape: - dyn_path = 'dynamic' - else: - dyn_path = '' - implf = os.path.join(impl_path, dyn_path, self.op_file + '.cpp') - tiling_file = os.path.join(impl_path, "../../op_tiling", self.op_file + '_tiling.h') - rep_conf = replay_codegen.ReplayCodeGen(ReplayCodeGenParams(self.op_type, implf, tiling_file, self.op_file, \ - self.op_intf, argn, self.op_replay_batch, self.max_block_dim, self.max_shape_size)) - rep_conf.set_batch(self.op_replay_batch) - rep_conf.set_outdir(out_path) - rep_conf.gen_replay(ops_product) - - -def gen_replay(cfgfile: str, cfgs: dict, dirs: dict, ops_product: str, ops: list = None): - batch_lists = cfgs.get(const_var.REPLAY_BATCH).split(';') - iterator_lists = cfgs.get(const_var.REPLAY_ITERATE).split(';') - op_descs = opdesc_parser.get_op_desc(cfgfile, batch_lists, iterator_lists, ReplayBuilder, ops) - for op_desc in op_descs: - op_desc.gen_replay_source(dirs.get(const_var.CFG_IMPL_DIR), dirs.get(const_var.CFG_OUT_DIR), ops_product) - - -if __name__ == '__main__': - if len(sys.argv) <= 6: - raise RuntimeError('arguments must greater than 6') - rep_cfg = {} - rep_cfg[const_var.REPLAY_BATCH] = sys.argv[2] - rep_cfg[const_var.REPLAY_ITERATE] = sys.argv[3] - rep_dir = {} - rep_dir[const_var.CFG_IMPL_DIR] = sys.argv[4] - rep_dir[const_var.CFG_OUT_DIR] = sys.argv[5] - gen_replay(sys.argv[1], rep_cfg, rep_dir, sys.argv[6]) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/batch_replay_impl.temp b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/batch_replay_impl.temp deleted file mode 100755 index 0e88346642009514af64265b4da24c9946e3ebbf..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/batch_replay_impl.temp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "replay_def.h" -#include "code_gen.h" -#include "replay_fun.h" -#include "register/op_check.h" -#define __ASCENDC_REPLAY_CODE__ -#include - -using namespace std; -using namespace optiling; -using namespace AscendCReplay; - -extern "C" void __KERNEL_FUN__ (__ARGS_DEF__, const char *); -extern "C" int elf_batch_append(char *elf, uint32_t elfSize, char *jit, int kernum, char *atext[], int alen[], - int atlen, const char* kernelname[]); - -#define KERNEL_N 1 -#define ARG_N (__ARG_NUM__) -#define MAX_L (1024 * 1024 * 100) -#define MAX_E (1024 * 1024) - -int __KERNEL_FUN___replay___OPS_PRODUCT__(ReplayFuncParam& param, const int core_type) -{ - // gen type 1 : direct call codes 0: load .o file - if (param.gentype < 0 || param.gentype > 1) { - printf("Error: call replay gen type is %d, should only be 1 or 0\n", param.gentype); - return 0; - } else if (param.gentype == 1 && param.objptr == nullptr) { - printf("Error: call replay with direct call mode, but code obj addr is null\n"); - return 0; - } else if (param.gentype == 0 && param.output_kernel_file == nullptr) { - printf("Error: call replay with object file mode, but object file path is null\n"); - return 0; - } - // core_type 0:MIX 1:CUBE 2:VEC - if (core_type < 0 || core_type > 2) { - printf("Error: call replay core type is %d !\n", core_type); - return 0; - } - g_coreType = __CORE_TYPE__; - g_taskRation = param.task_ration; - g_tilingKey = param.tiling_key; - - unsigned char *buf, *jit; - char *kernel[KERNEL_N]; - int len[KERNEL_N]; - block_idx = 0; - block_num = param.block_dim; - g_ubBase = block_num; - uint8_t *code = (uint8_t *)malloc(MAX_L); - uint8_t *pos = code; - struct timespec tp1, tp2; - - clock_gettime(CLOCK_MONOTONIC, &tp1); - if (block_num > 32) { - printf("Error: block_num > 32\n"); - return 0; - } - //__OP_FOPEN__ - for (int i = 0; i < KERNEL_N; i++) { - //__OP_SET_KERNEL__ - for (int j = 0; j < ARG_N; j++) - AddArg(j, ARG_STEP * (j + 1)); -#ifdef FP_CEILING - SetCtrlFloatEnable(); -#else - SetCtrlFloatDisable(); -#endif - CodeInit(pos, true); - __KERNEL_FUN__(__KERNEL_ARGS__, param.tiling_data); - CodeEnd(); - kernel[i] = (char *)pos; - len[i] = CodeLen(); - pos += len[i]; - } - //__OP_FCLOSE__ - clock_gettime(CLOCK_MONOTONIC, &tp2); - buf = (unsigned char *)malloc(MAX_E); - int fd = open(param.entry_file, O_RDONLY); - if (fd < 0) { - printf("[error]: cannot find entry.o : %s\n", param.entry_file); - return 0; - } - uint32_t bufSize = read(fd, buf, MAX_E); - if (bufSize <= 0) { - printf("[error]: entry.o : %s is too small ! \n", param.entry_file); - } - close(fd); - jit = (unsigned char *)malloc(MAX_L); - printf("total code generated %ld\n", pos - code); - int sz = elf_batch_append((char *)buf, bufSize, (char *)jit, KERNEL_N, kernel, len, pos - code, ¶m.kernel_name); - if (tp1.tv_sec != tp2.tv_sec) { - printf("%ld NS\n", tp2.tv_nsec + 1000000000 - tp1.tv_nsec); - } else { - printf("%ld NS\n", tp2.tv_nsec - tp1.tv_nsec); - } - printf("new elf size %d\n", sz); - if (param.gentype == 0) { - fd = open(param.output_kernel_file, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); - (void)write(fd, jit, sz); - close(fd); - free(jit); - } else if (param.gentype == 1) { - *param.objptr = (char*)jit; - } - free(buf); - free(code); - return sz; -} - -REG_REPLAY_FUNC(__OPTYPE__, __OPS_PRODUCT__, __KERNEL_FUN___replay___OPS_PRODUCT__); diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/code_channel_infer.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/code_channel_infer.py deleted file mode 100755 index 137c9f39fa8609cd0cf672388c1066e85aff792f..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/code_channel_infer.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" -import os -import stat -import ctypes -import collections -import shutil -import subprocess -import copy - -"""CODE_* is used to cube/vector api is called in operator code -CODE_MIX means both cube and vector api is called -CODE_CUBE means only cube api is called -CODE_VEC means only vector api is called -""" -CODE_MIX = 0 -CODE_CUBE = 1 -CODE_VEC = 2 - - -def _is_v220(op_product: str): - """return if current soc version is V220 - - Returns: - res: True means V220 - """ - if op_product in ["ascend910b"]: - return True - return False - - -InfoCodeChanelParams = collections.namedtuple('InfoCodeChanelParams',\ -['src_file', 'tiling_header', 'kernel_name', 'outdir', 'op_product', 'compile_options']) - - -def infer_code_channel(params: InfoCodeChanelParams): - """get code channel for v220, return CODE_MIX if soc version is not V220 - - Args: - src_file (str): AscendC operator code file - src_file (str): AscendC operator tiling header file - kernel_name (str): kernel function name - optype (str): operator type - compile_options (list): compile options for ccec cmd - - Raises: - Exception: if not exist L1/L0/UB if code, it's not a aicore code - - Returns: - res (int): CODE_MIX/CODE_CUBE/CODE_VEC - """ - if not _is_v220(params.op_product): - return CODE_MIX - return CODE_VEC - if params.compile_options is None: - compile_options = [] - else: - compile_options = params.compile_options - ccec = shutil.which("ccec") - if ccec is not None: - ccec_path = os.path.dirname(ccec) - tikcpp_path = os.path.realpath(os.path.join(ccec_path, "..", "..", "tikcpp")) - else: - tikcpp_path = os.path.realpath("/usr/local/Ascend/latest/compiler/tikcpp") - compile_options.append("-I" + tikcpp_path) - compile_options.append("-I" + os.path.join(tikcpp_path, "tikcfw")) - compile_options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "impl")) - compile_options.append("-I" + os.path.join(tikcpp_path, "tikcfw", "interface")) - compile_options += ["-include", params.tiling_header] - arch = "dav-c220-cube" - sub_core_type = "AIC" - optional_core = "AiCore" - compile_cmd = [shutil.which("ccec"), '-c', '-O3'] - compile_cmd += compile_options - temp_file_name_tag = "_" + str(os.getpid()) + "_temp.o" - dst_file = os.path.join(kernel_meta_dir, kernel_name + temp_file_name_tag) - compile_cmd += [params.src_file, "--cce-aicore-arch={}".format(arch), - "--cce-aicore-only", "-o", dst_file, - "-mllvm", "-cce-aicore-function-stack-size=16000", - "-mllvm", "-cce-aicore-record-overflow=true", - "-mllvm", "-cce-aicore-addr-transform"] - compile_cmd += ["-std=c++17"] - print('get_code_channel: ', ' '.join(compile_cmd)) - proc = subprocess.Popen( - compile_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - (out, _) = proc.communicate() - if proc.returncode != 0: - print('get_code_channel coretype compile error: ', out.decode()) - msg = "compile %s error :%s\n" % (params.src_file, out.decode()) - raise Exception(f"get_code_channel coretype error, msg is{msg}") - objdump_cmd = ['objdump', '-s', '-j', '.text', '{}'.format(dst_file)] - - proc = subprocess.Popen( - objdump_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - (out, _) = proc.communicate() - if proc.returncode != 0: - print('get_code_channel objdump error: ', out.decode()) - msg = "get_code_channel objdump %s error :%s\n" % (src_file, out.decode()) - raise Exception(f"get_code_channel objdump error, msg is{msg}") - os.remove(dst_file) - lines = out.decode('utf-8').split('\n') - for line in lines: - insts = line.strip().split() - if len(insts) < 5: - continue - for inst in insts[1:]: - if len(inst) != 8: - continue - if inst[6] == 'f': - return CODE_MIX - return CODE_VEC diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/const_var.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/const_var.py deleted file mode 100755 index 8b32c3b915d0aaaf1f366cf95cf1a337e8959c89..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/const_var.py +++ /dev/null @@ -1,33 +0,0 @@ - -#!/usr/bin/env python -# coding=utf-8 -""" -Function: -The replay funtion entry -Copyright Information: -Huawei Technologies Co., Ltd. All Rights Reserved © 2020 -""" - -import os -import stat - - -REPLAY_BATCH = 'batch' -REPLAY_ITERATE = 'iterate' -CFG_IMPL_DIR = 'impl_dir' -CFG_OUT_DIR = 'out_dir' -AUTO_GEN_DIR = 'auto_gen_dir' -WFLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC -WMODES = stat.S_IWUSR | stat.S_IRUSR -SOC_MAP_EXT = {'ascend310p': 'Ascend310P3', 'ascend310b': 'Ascend310B1', - 'ascend910': 'Ascend910A', 'ascend910b': 'Ascend910B1'} -BIN_CMD = 'opc $1 --main_func={fun} --input_param={param} --soc_version={soc} \ ---output=$2 --impl_mode={impl} --simplified_key_mode=0 --op_mode=dynamic\n' -CHK_CMD = ''' -if ! test -f $2/{res_file} ; then - echo "$2/{res_file} not generated!" - exit 1 -fi -''' -ATTR_DEF_VAL = {'str' : '', 'int': 0, 'float': 0.0, 'bool': False, 'list_bool': [], - 'list_int': [], 'list_float': [], 'list_list_int': [[]]} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_impl_and_mrege_json.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_impl_and_mrege_json.sh deleted file mode 100755 index 55e12e5edff6d1d39207db0c439a15fcb8656951..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_impl_and_mrege_json.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/bash - -project_path=$1 -build_path=$2 -vendor_name=customize -if [[ ! -d "$project_path" ]]; then - echo "[ERROR] No projcet path is provided" - exit 1 -fi - -if [[ ! -d "$build_path" ]]; then - echo "[ERROR] No build path is provided" - exit 1 -fi - -# copy ai_core operators implements -# tbe_impl_files_num=$(ls $project_path/tbe/impl/* 2> /dev/null | wc -l) -# if [[ "$tbe_impl_files_num" -gt 0 ]];then -# cp -r ${project_path}/tbe/impl/* ${build_path}/makepkg/packages/vendors/$vendor_name/op_impl/ai_core/tbe/customize_impl -# cp -r ${project_path}/tbe/impl/* ${build_path}/makepkg/packages/vendors/$vendor_name/op_impl/vector_core/tbe/customize_impl -# fi - -# copy aicpu kernel so operators -if [[ -d "${project_path}/cpukernel/aicpu_kernel_lib" ]]; then - cp -f ${project_path}/cpukernel/aicpu_kernel_lib/* ${build_path}/makepkg/packages/vendors/$vendor_name/op_impl/cpu/aicpu_kernel/impl - rm -rf ${project_path}/cpukernel/aicpu_kernel_lib -fi - -# merge aicpu.ini and aicore.ini to generate npu_supported_ops.json -# mkdir -p ${build_path}/framework/op_info_cfg -# mkdir -p ${build_path}/framework/op_info_cfg/aicpu_kernel -# mkdir -p ${build_path}/framework/op_info_cfg/ai_core - -# if [[ -d "${project_path}/tbe/op_info_cfg/ai_core" ]]; then -# bash ${project_path}/cmake/util/gen_ops_filter.sh ${project_path}/tbe/op_info_cfg/ai_core ${build_path}/framework/op_info_cfg/ai_core -# fi - -# if [[ -d "${project_path}/cpukernel/op_info_cfg/aicpu_kernel" ]]; then -# bash ${project_path}/cmake/util/gen_ops_filter.sh ${project_path}/cpukernel/op_info_cfg/aicpu_kernel ${build_path}/framework/op_info_cfg/aicpu_kernel -# fi - -# aicpu_filter_file=${build_path}/framework/op_info_cfg/aicpu_kernel/npu_supported_ops.json -# aicore_filter_file=${build_path}/framework/op_info_cfg/ai_core/npu_supported_ops.json -# if [[ -f "${aicpu_filter_file}" ]] && [[ ! -f "${aicore_filter_file}" ]]; then -# cp $aicpu_filter_file ${build_path}/makepkg/packages/vendors/$vendor_name/framework/tensorflow -# fi -# if [[ -f "${aicore_filter_file}" ]] && [[ ! -f "${aicpu_filter_file}" ]]; then -# cp $aicore_filter_file ${build_path}/makepkg/packages/vendors/$vendor_name/framework/tensorflow -# fi - -# if [[ -f "${aicore_filter_file}" ]] && [[ -f "${aicpu_filter_file}" ]]; then -# chmod u+w ${aicpu_filter_file} -# python3 ${project_path}/cmake/util/insert_op_info.py ${aicore_filter_file} ${aicpu_filter_file} -# chmod u-w ${aicpu_filter_file} -# cp $aicpu_filter_file ${build_path}/makepkg/packages/vendors/$vendor_name/framework/tensorflow -# fi - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_ops_filter.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_ops_filter.sh deleted file mode 100755 index d4c27d17feb8617dfee0f6fd3262c36583033339..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_ops_filter.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -# Description: Generate npu_supported_ops.json -# ============================================================================== - -if [[ -z "$1" ]]; then - echo -e "[ERROR] No source dir provided" - exit 1 -fi - -if [[ -z "$2" ]]; then - echo -e "[ERROR] No destination dir provided" - exit 1 -fi - -src=$1 -dest_file=$2/npu_supported_ops.json - -if [ -f "$dest_file" ];then - chmod u+w $dest_file -fi - -echo $* - -add_ops() { - name=$1 - isHeavy=$2 - file=$3 - grep -w "\"$name\"" ${file} >/dev/null - if [ $? == 0 ];then - return - fi - echo " \"${name}\": {" >> ${file} - echo " \"isGray\": false," >> ${file} - echo " \"isHeavy\": ${isHeavy}" >> ${file} - echo " }," >> ${file} -} - -echo "{" > ${dest_file} -ini_files=$(find ${src} -name "*.ini") -for file in ${ini_files} ; do - name=$(grep '^\[' ${file} | sed 's/\[//g' | sed 's/]//g' | sed 's/\r//g') - grep 'heavyOp.flag' ${file} >/dev/null - if [ $? == 0 ];then - isHeavy=$(grep 'heavyOp.flag' ${file} | awk -F= '{print $2}') - else - isHeavy="false" - fi - for op in ${name} ; do - add_ops ${op} "false" ${dest_file} - done -done -echo "}" >> ${dest_file} -file_count=$(cat ${dest_file} | wc -l) -line=$(($file_count-1)) -sed -i "${line}{s/,//g}" ${dest_file} - -chmod 640 "${dest_file}" -echo -e "[INFO] Succed generated ${dest_file}" - -exit 0 - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_version_info.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_version_info.sh deleted file mode 100755 index a06cfc78d29482807d086b880375533cd0a3679e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/gen_version_info.sh +++ /dev/null @@ -1,6 +0,0 @@ -ascend_install_dir=$1 -gen_file_dir=$2 - -# create version.info -compiler_version=$(grep "Version" -w ${ascend_install_dir}/compiler/version.info | awk -F = '{print $2}') -echo "custom_opp_compiler_version=${compiler_version}" > ${gen_file_dir}/version.info \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_op_info.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_op_info.py deleted file mode 100755 index 28ba08757c9301391a8f4005ae8fb0b290e43950..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_op_info.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" -import json -import os -import sys -import stat -import const_var - - -if __name__ == '__main__': - if len(sys.argv) != 3: - print(sys.argv) - print('argv error, inert_op_info.py your_op_file lib_op_file') - sys.exit(2) - - with open(sys.argv[1], 'r') as load_f: - insert_operator = json.load(load_f) - - all_operators = {} - if os.path.exists(sys.argv[2]): - if os.path.getsize(sys.argv[2]) != 0: - with open(sys.argv[2], 'r') as load_f: - all_operators = json.load(load_f) - - for k in insert_operator.keys(): - if k in all_operators.keys(): - print('replace op:[', k, '] success') - else: - print('insert op:[', k, '] success') - all_operators[k] = insert_operator[k] - - with os.fdopen(os.open(sys.argv[2], const_var.WFLAGS, const_var.WMODES), 'w') as json_file: - json_file.write(json.dumps(all_operators, indent=4)) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_simplified_keys.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_simplified_keys.py deleted file mode 100755 index ace727b903b1de37e6feece649cd20f242d94798..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/insert_simplified_keys.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os -import re -import glob -import json -import argparse -import const_var - - -DATA_TPYE_DICT = { - 'float32': 0, - 'float16': 1, - 'int8': 2, - 'int16': 6, - 'uint16': 7, - 'uint8': 4, - 'int32': 3, - 'int64': 9, - 'uint32': 8, - 'uint64': 10, - 'bool': 12, - 'double': 11, - 'string': 13, - 'dual': 14, - 'dual': 15, - 'complex64': 16, - 'complex128': 17, - 'qint8': 18, - 'qint16': 19, - 'qint32': 20, - 'quint8': 21, - 'quint16': 22, - 'resource': 23, - 'string': 24, - 'dual': 25, - 'variant': 26, - 'bf16': 27, - 'bfloat16': 27, - 'undefined': 28, - 'int4': 29, - 'uint1': 30, - 'int2': 31 -} - -FORMAT_DICT = { - 'NCHW': 0, - 'NHWC': 1, - 'ND': 2, - 'NC1HWC0': 3, - 'FRACTAL_Z': 4, - 'NC1C0HWPAD': 5, - 'NHWC1C0': 6, - 'FSR_NCHW': 7, - 'FRACTAL_DECONV': 8, - 'C1HWNC0': 9, - 'FRACTAL_DECONV_TRANSPOSE': 10, - 'FRACTAL_DECONV_SP_STRIDE_TRANS': 11, - 'NC1HWC0_C04': 12, - 'FRACTAL_Z_C04': 13, - 'CHWN': 14, - 'FRACTAL_DECONV_SP_STRIDE8_TRANS': 15, - 'HWCN': 16, - 'NC1KHKWHWC0': 17, - 'BN_WEIGHT': 18, - 'FILTER_HWCK': 19, - 'HASHTABLE_LOOKUP_LOOKUPS': 20, - 'HASHTABLE_LOOKUP_KEYS': 21, - 'HASHTABLE_LOOKUP_VALUE': 22, - 'HASHTABLE_LOOKUP_OUTPUT': 23, - 'HASHTABLE_LOOKUP_HITS': 24, - 'C1HWNCoC0': 25, - 'MD': 26, - 'NDHWC': 27, - 'FRACTAL_ZZ': 28, - 'FRACTAL_NZ': 29, - 'NCDHW': 30, - 'DHWCN': 31, - 'NDC1HWC0': 32, - 'FRACTAL_Z_3D': 33, - 'CN': 34, - 'NC': 35, - 'DHWNC': 36, - 'FRACTAL_Z_3D_TRANSPOSE': 37, - 'FRACTAL_ZN_LSTM': 38, - 'FRACTAL_Z_G': 39, - 'RESERVED': 40, - 'ALL': 41, - 'NULL': 42, - 'ND_RNN_BIAS': 43, - 'FRACTAL_ZN_RNN': 44, - 'NYUV': 45, - 'NYUV_A': 46 -} - - -def load_json(json_file: str): - with open(json_file, encoding='utf-8') as file: - json_content = json.load(file) - return json_content - - -def get_specified_suffix_file(root_dir, suffix): - specified_suffix = os.path.join(root_dir, '**/*.{}'.format(suffix)) - all_suffix_files = glob.glob(specified_suffix, recursive=True) - return all_suffix_files - - -def get_deterministic_value(support_info): - deterministic_key = 'deterministic' - if deterministic_key not in support_info: - return 0 - deterministic_value = support_info.get(deterministic_key) - if deterministic_value == 'true': - return 1 - else: - return 0 - - -def get_precision_value(support_info): - precision_key = 'implMode' - precision_value = support_info.get(precision_key) - if precision_value == 'high_performance': - _value = 1 - elif precision_value == 'high_precision': - _value = 2 - else: - _value = 0 - return _value - - -def get_overflow_value(support_info): - return 0 - - -def get_parameters(info): - if info: - if 'dtype' in info: - data_type = info['dtype'] - data_type_value = DATA_TPYE_DICT.get(data_type) - else: - data_type_value = 0 - if 'format' in info: - _format = info['format'] - _format_value = FORMAT_DICT.get(_format) - else: - _format_value = 0 - else: - data_type_value = 0 - _format_value = 0 - return str(data_type_value), str(_format_value) - - -def get_dynamic_parameters(info): - # 动态输入时只需获取第一个参数 - return get_parameters(info[0]) - - -def get_all_parameters(support_info, _type): - result_list = list() - info_lists = support_info.get(_type) - if info_lists: - for _info in info_lists: - # 输入为列表时是动态输入 - if isinstance(_info, (list, tuple)): - data_type_value, _format_value = get_dynamic_parameters(_info) - else: - data_type_value, _format_value = get_parameters(_info) - result_list.append("{},{}".format(data_type_value, _format_value)) - return result_list - - -def get_all_input_parameters(support_info): - result = get_all_parameters(support_info, 'inputs') - return '/'.join(result) - - -def insert_content_into_file(input_file, content): - with open(input_file, 'r+') as file: - lines = file.readlines() - for index, line in enumerate(lines): - match_result = re.search(r'"staticKey":', line) - if match_result: - count = len(line) - len(line.lstrip()) - new_content = "{}{}".format(' ' * count, content) - # 插入到前一行,防止插入最后时还需要考虑是否添加逗号 - lines.insert(index, new_content) - break - file.seek(0) - file.write(''.join(lines)) - - -def insert_simplified_keys(json_file): - contents = load_json(json_file) - # 不存在'binFileName'或者'supportInfo'字段时,非需要替换的解析json文件 - if ('binFileName' not in contents) or ('supportInfo' not in contents): - return - support_info = contents.get('supportInfo') - bin_file_name = contents.get('binFileName') - bin_suffix = contents.get('binFileSuffix') - # 'simplifiedKey'字段已经存在时,直接返回,不重复生成 - if 'simplifiedKey' in support_info: - return - op_type = bin_file_name.split('_')[0] - deterministic = str(get_deterministic_value(support_info)) - precision = str(get_precision_value(support_info)) - overflow = str(get_overflow_value(support_info)) - input_parameters = get_all_input_parameters(support_info) - key = '{}/d={},p={},o={}/{}/'.format( - op_type, - deterministic, - precision, - overflow, - input_parameters) - result = '"simplifiedKey": "' + key + '",\n' - insert_content_into_file(json_file, result) - - -def insert_all_simplified_keys(root_dir): - suffix = 'json' - all_json_files = get_specified_suffix_file(root_dir, suffix) - for _json in all_json_files: - insert_simplified_keys(_json) - - -def args_prase(): - parser = argparse.ArgumentParser() - parser.add_argument('-p', - '--path', - nargs='?', - required=True, - help='Parse the path of the json file.') - return parser.parse_args() - - -def main(): - args = args_prase() - insert_all_simplified_keys(args.path) - - -if __name__ == '__main__': - main() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_entry.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_entry.py deleted file mode 100755 index 2b77c970d4e6c1f0aaca07572cd8c7221ac00e22..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_entry.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - - -def gen_fun_def(title, kernel, argn, arg_type, arg_name): - entry = [] - entry.append(title) - entry.append(kernel) - entry.append('(') - args = [] - for i in range(0, argn): - args.append(arg_type + ' ' + arg_name + str(i)) - entry.append(', '.join(args)) - entry.append(')') - return ' '.join(entry) - - -def gen_batch_kernel_body(fname, argn, arg_name): - body = [] - body.append('{') - fun = [] - fun.append(fname) - fun.append('(') - args = [] - for i in range(0, argn): - args.append(arg_name + str(i)) - fun.append(', '.join(args)) - fun.append(');') - body.append(' '.join(fun)) - body.append('}') - return '\n'.join(body) - - -def gen_mc_kernel_body(kn, argn, arg_name, blknum): - body = [] - body.append('{') - body.append(' switch(block_idx) {') - for blk in range(0, blknum): - fun = [] - fun.append('{}_blk{:02d}'.format(kn, blk)) - fun.append('(') - args = [] - for i in range(0, argn): - args.append(arg_name + str(i)) - fun.append(', '.join(args)) - fun.append(')') - body.append(' case {}: {}; break;'.format(blk, ' '.join(fun))) - body.append(' default: break;') - body.append(' }') - body.append('}') - return '\n'.join(body) - - -def gen_proc_body(argn, arg_name): - body = [] - body.append('{') - args = [] - for i in range(0, argn): - args.append(arg_name + str(i)) - body.append('uint64_t __x = (uint64_t)' + ' + (uint64_t)'.join(args) + ';') - body.append('__asm__ ("NOP");') - body.append('__asm__ ("NOP");') - body.append('__asm__ ("NOP");') - body.append('}') - return '\n'.join(body) - - -def batch_code_gen(kn, argn, argt): - codes = [] - kernel_name = kn - proc_name = kernel_name + '_percore' - arg_num = int(argn) - data_type = argt - arg_type = '__gm__ ' + data_type + '* __restrict__' - arg_name = 'arg' - kernel_title = 'extern \"C\" __global__ __aicore__ void' - proc_title = 'extern \"C\" __attribute__((noinline)) __aicore__ void' - codes.append('#ifndef __aicore__') - codes.append('#define __aicore__ [aicore]') - codes.append('#endif') - codes.append(gen_fun_def(proc_title, proc_name, arg_num, arg_type, arg_name) + ';') - codes.append(gen_fun_def(kernel_title, kernel_name, arg_num, arg_type, arg_name)) - codes.append(gen_batch_kernel_body(proc_name, arg_num, arg_name)) - codes.append(gen_fun_def(proc_title, proc_name, arg_num, arg_type, arg_name)) - codes.append(gen_proc_body(arg_num, arg_name)) - return '\n'.join(codes) + '\n' - - -def mc_code_gen(kn, argn, argt, blknum): - codes = [] - kernel_name = kn - core_num = int(blknum) - arg_num = int(argn) - data_type = argt - arg_type = '__gm__ ' + data_type + '* __restrict__' - arg_name = 'arg' - kernel_title = 'extern \"C\" __global__ __aicore__ void' - proc_title = 'extern \"C\" __attribute__((noinline)) __aicore__ void' - codes.append('#ifndef __aicore__') - codes.append('#define __aicore__ [aicore]') - codes.append('#endif') - for i in range(0, core_num): - proc_name = '{}_blk{:02d}'.format(kernel_name, i) - codes.append(gen_fun_def(proc_title, proc_name, arg_num, arg_type, arg_name) + ';') - codes.append(gen_fun_def(kernel_title, kernel_name, arg_num, arg_type, arg_name)) - codes.append(gen_mc_kernel_body(kernel_name, arg_num, arg_name, core_num)) - for i in range(0, core_num): - proc_name = '{}_blk{:02d}'.format(kernel_name, i) - codes.append(gen_fun_def(proc_title, proc_name, arg_num, arg_type, arg_name)) - codes.append(gen_proc_body(arg_num, arg_name)) - return '\n'.join(codes) + '\n' diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_impl.temp b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_impl.temp deleted file mode 100755 index 5079a1043a25cd6b73449e708ceae40807cb03a1..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/kernel_impl.temp +++ /dev/null @@ -1,10 +0,0 @@ -#include -#include -#include -#include -#include -#include "replay_def.h" -#include "code_gen.h" -#include "replay_fun.h" -#define __ASCENDC_REPLAY_CODE__ -#include "__CCE_FILE__" diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/COPYING b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/COPYING deleted file mode 100755 index d159169d1050894d3ea3b98e1c965c4058208fe1..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/README.md deleted file mode 100755 index b41f0168201e8596e6cb8dc8754d606581d18dcf..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/README.md +++ /dev/null @@ -1,246 +0,0 @@ -[![License: GPL v2](https://img.shields.io/badge/License-GPL%20v2-blue.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) -![Build Status](https://github.com/megastep/makeself/workflows/CI/badge.svg) - -# makeself - Make self-extractable archives on Unix - -[makeself.sh][1] is a small shell script that generates a self-extractable -compressed tar archive from a directory. The resulting file appears as a shell script -(many of those have a **.run** suffix), and can be launched as is. The archive -will then uncompress itself to a temporary directory and an optional arbitrary -command will be executed (for example an installation script). This is pretty -similar to archives generated with WinZip Self-Extractor in the Windows world. -Makeself archives also include checksums for integrity self-validation (CRC -and/or MD5/SHA256 checksums). - -The makeself.sh script itself is used only to create the archives from a -directory of files. The resultant archive is actually a compressed (using -gzip, bzip2, or compress) TAR archive, with a small shell script stub at the -beginning. This small stub performs all the steps of extracting the files, -running the embedded command, and removing the temporary files when done. -All the user has to do to install the software contained in such an -archive is to "run" the archive, i.e **sh nice-software.run**. I recommend -using the ".run" (which was introduced by some Makeself archives released by -Loki Software) or ".sh" suffix for such archives not to confuse the users, -so that they will know they are actually shell scripts (with quite a lot of binary data -attached to them though!). - -I am trying to keep the code of this script as portable as possible, i.e it is -not relying on any bash-specific features and only calls commands that are -installed on any functioning UNIX-compatible system. This script as well as -the archives it generates should run on any Unix flavor, with any compatible -Bourne shell, provided of course that the compression programs are available. - -As of version 2.1, Makeself has been rewritten and tested on the following -platforms : - - * Linux (all distributions) - * Sun Solaris (8 and above) - * HP-UX (tested on 11.0 and 11i on HPPA RISC) - * SCO OpenUnix and OpenServer - * IBM AIX 5.1L - * macOS (Darwin) - * SGI IRIX 6.5 - * FreeBSD - * UnicOS / Cray - * Cygwin (Windows) - -If you successfully run Makeself and/or archives created with it on another -system, then please [let me know][2]! - -Examples of publicly available archives made using makeself are : - - * Game patches and installers for [Id Software][3] games like Quake 3 for Linux or Return To Castle Wolfenstein ; - * All game patches released by [Loki Software][4] for the Linux version of popular games ; - * The [nVidia drivers][5] for Linux - * The installer for the Linux version of [Google Earth][6] - * The [VirtualBox][7] installers for Linux - * The [Makeself][1] distribution itself ;-) - * and countless others... - -**Important note for Apache users:** By default, most Web servers will think that Makeself archives are regular text files and thus they may show up as text in a Web browser. The correct way to prevent this is to add a MIME type for this file format, like so (in httpd.conf) : - -`AddType application/x-makeself .run` - -**Important note for certain GNU/Linux distributions:** Archives created with Makeself prior to v2.1.2 were using an old syntax for the _head_ and _tail_ Unix commands that is being progressively obsoleted in their GNU forms. Therefore you may have problems uncompressing some of these archives. A workaround for this is to set the environment variable $_POSIX2_VERSION to enable the old syntax, i.e. : - -`export _POSIX2_VERSION=199209` - -## Usage - -The syntax of makeself is the following: - -``` -makeself.sh [args] archive_dir file_name label startup_script [script_args] -``` - - * _args_ are optional options for Makeself. The available ones are : - - * **`--version`** : Prints the version number on stdout, then exits immediately - * **`--gzip`** : Use gzip for compression (the default on platforms on which gzip is commonly available, like Linux) - * **`--bzip2`** : Use bzip2 instead of gzip for better compression. The bzip2 command must be available in the command path. It is recommended that the archive prefix be set to something like '.bz2.run', so that potential users know that they'll need bzip2 to extract it. - * **`--pbzip2`** : Use pbzip2 instead of gzip for better and faster compression on machines having multiple CPUs. The pbzip2 command must be available in the command path. It is recommended that the archive prefix be set to something like '.bz2.run', so that potential users know that they'll need bzip2 to extract it. - * **`--xz`** : Use xz instead of gzip for better compression. The xz command must be available in the command path. It is recommended that the archive prefix be set to something like '.xz.run' for the archive, so that potential users know that they'll need xz to extract it. - * **`--lzo`** : Use lzop instead of gzip for better compression. The lzop command must be available in the command path. It is recommended that the archive prefix be set to something like `.lzo.run` for the archive, so that potential users know that they'll need lzop to extract it. - * **`--lz4`** : Use lz4 instead of gzip for better compression. The lz4 command must be available in the command path. It is recommended that the archive prefix be set to something like '.lz4.run' for the archive, so that potential users know that they'll need lz4 to extract it. - * **`--zstd`** : Use zstd instead of gzip for better compression. The zstd command must be available in the command path. It is recommended that the archive prefix be set to something like '.zstd.run' for the archive, so that potential users know that they'll need zstd to extract it. - * **`--pigz`** : Use pigz for compression. - * **`--base64`** : Encode the archive to ASCII in Base64 format instead of compressing (base64 command required). - * **`--gpg-encrypt`** : Encrypt the archive using `gpg -ac -z $COMPRESS_LEVEL`. This will prompt for a password to encrypt with. Assumes that potential users have `gpg` installed. - * **`--ssl-encrypt`** : Encrypt the archive using `openssl aes-256-cbc -a -salt`. This will prompt for a password to encrypt with. Assumes that the potential users have the OpenSSL tools installed. - * **`--compress`** : Use the UNIX `compress` command to compress the data. This should be the default on all platforms that don't have gzip available. - * **`--nocomp`** : Do not use any compression for the archive, which will then be an uncompressed TAR. - * **`--complevel`** : Specify the compression level for gzip, bzip2, pbzip2, zstd, xz, lzo or lz4. (defaults to 9) - * **`--threads`** : Specify the number of threads to be used by compressors that support parallelization. Omit to use compressor's default. Most useful (and required) for opting into xz's threading, usually with `--threads=0` for all available cores. pbzip2 and pigz are parallel by default, and setting this value allows limiting the number of threads they use. - * **`--notemp`** : The generated archive will not extract the files to a temporary directory, but in a new directory created in the current directory. This is better to distribute software packages that may extract and compile by themselves (i.e. launch the compilation through the embedded script). - * **`--current`** : Files will be extracted to the current directory, instead of in a subdirectory. This option implies `--notemp` above. - * **`--follow`** : Follow the symbolic links inside of the archive directory, i.e. store the files that are being pointed to instead of the links themselves. - * **`--append`** _(new in 2.1.x)_: Append data to an existing archive, instead of creating a new one. In this mode, the settings from the original archive are reused (compression type, label, embedded script), and thus don't need to be specified again on the command line. - * **`--header`** : Makeself uses a separate file to store the header stub, called `makeself-header.sh`. By default, it is assumed that it is stored in the same location as makeself.sh. This option can be used to specify its actual location if it is stored someplace else. - * **`--cleanup`** : Specify a script that is run when execution is interrupted or finishes successfully. The script is executed with the same environment and initial `script_args` as `startup_script`. - * **`--copy`** : Upon extraction, the archive will first extract itself to a temporary directory. The main application of this is to allow self-contained installers stored in a Makeself archive on a CD, when the installer program will later need to unmount the CD and allow a new one to be inserted. This prevents "Filesystem busy" errors for installers that span multiple CDs. - * **`--nox11`** : Disable the automatic spawning of a new terminal in X11. - * **`--nowait`** : When executed from a new X11 terminal, disable the user prompt at the end of the script execution. - * **`--nomd5`** and **`--nocrc`** : Disable the creation of a MD5 / CRC checksum for the archive. This speeds up the extraction process if integrity checking is not necessary. - * **`--sha256`** : Adds a SHA256 checksum for the archive. This is in addition to the MD5 / CRC checksums unless `--nomd5` is also used. - * **`--lsm` _file_** : Provide and LSM file to makeself, that will be embedded in the generated archive. LSM files are describing a software package in a way that is easily parseable. The LSM entry can then be later retrieved using the `--lsm` argument to the archive. An example of a LSM file is provided with Makeself. - * **`--tar-format opt`** : Specify the tar archive format (default is ustar); you may use any value accepted by your tar command (such as posix, v7, etc). - * **`--tar-extra opt`** : Append more options to the tar command line. - - For instance, in order to exclude the `.git` directory from the packaged archive directory using the GNU `tar`, one can use `makeself.sh --tar-extra "--exclude=.git" ...` - - * **`--keep-umask`** : Keep the umask set to shell default, rather than overriding when executing self-extracting archive. - * **`--packaging-date date`** : Use provided string as the packaging date instead of the current date. - * **`--license`** : Append a license file. - * **`--nooverwrite`** : Do not extract the archive if the specified target directory already exists. - * **`--help-header file`** : Add a header to the archive's `--help` output. - * `archive_dir` is the name of the directory that contains the files to be archived - * `file_name` is the name of the archive to be created - * `label` is an arbitrary text string describing the package. It will be displayed while extracting the files. - * `startup_script` is the command to be executed _from within_ the directory of extracted files. Thus, if you wish to execute a program contained in this directory, you must prefix your command with `./`. For example, `./program` will be fine. The `script_args` are additional arguments for this command. - -Here is an example, assuming the user has a package image stored in a **/home/joe/mysoft**, and he wants to generate a self-extracting package named -**mysoft.sh**, which will launch the "setup" script initially stored in /home/joe/mysoft : - -`makeself.sh /home/joe/mysoft mysoft.sh "Joe's Nice Software Package" ./setup -` - -Here is also how I created the [makeself.run][9] archive which contains the Makeself distribution : - -`makeself.sh --notemp makeself makeself.run "Makeself by Stephane Peter" echo "Makeself has extracted itself" ` - -Archives generated with Makeself can be passed the following arguments: - - * **`--keep`** : Prevent the files to be extracted in a temporary directory that will be removed after the embedded script's execution. The files will then be extracted in the current working directory and will stay here until you remove them. - * **`--verbose`** : Will prompt the user before executing the embedded command - * **`--target dir`** : Allows to extract the archive in an arbitrary place. - * **`--nox11`** : Do not spawn a X11 terminal. - * **`--confirm`** : Prompt the user for confirmation before running the embedded command. - * **`--info`** : Print out general information about the archive (does not extract). - * **`--lsm`** : Print out the LSM entry, if it is present. - * **`--list`** : List the files in the archive. - * **`--check`** : Check the archive for integrity using the embedded checksums. Does not extract the archive. - * **`--nochown`** : By default, a `chown -R` command is run on the target directory after extraction, so that all files belong to the current user. This is mostly needed if you are running as root, as tar will then try to recreate the initial user ownerships. You may disable this behavior with this flag. - * **`--tar`** : Run the tar command on the contents of the archive, using the following arguments as parameter for the command. - * **`--noexec`** : Do not run the embedded script after extraction. - * **`--noexec-cleanup`** : Do not run the embedded cleanup script. - * **`--nodiskspace`** : Do not check for available disk space before attempting to extract. - * **`--cleanup-args`** : Specify arguments to be passed to the cleanup script. Wrap value in quotes to specify multiple arguments. - -Any subsequent arguments to the archive will be passed as additional arguments to the embedded command. You must explicitly use the `--` special command-line construct before any such options to make sure that Makeself will not try to interpret them. - -## Startup Script - -The startup script must be a regular Shell script. - -Within the startup script, you can use the `$USER_PWD` variable to get the path of the folder from which the self-extracting script is executed. This is especially useful to access files that are located in the same folder as the script, as shown in the example below. - -`my-self-extracting-script.sh --fooBarFileParameter foo.bar` - -## Building and Testing - -Clone the git repo and execute `git submodule update --init --recursive` to obtain all submodules. - -* To make a release: `make` -* To run all tests: `make test` - -## Maven Usage - -Makeself is now supported by the following maven plugin [makeself-maven-plugin](https://github.com/hazendaz/makeself-maven-plugin). Please refer to project for usage and report any bugs in regards to maven plugin on that project. - -## License - -Makeself itself is covered by the [GNU General Public License][8] (GPL) version 2 and above. Archives generated by Makeself don't have to be placed under this license (although I encourage it ;-)), since the archive itself is merely data for Makeself. - -## Contributing - -I will gladly consider merging your pull requests on the [GitHub][10] repository. However, please keep the following in mind: - - * One of the main purposes of Makeself is portability. Do not submit patches that will break supported platforms. The more platform-agnostic, the better. - * Please explain clearly what the purpose of the patch is, and how you achieved it. - -## Download - -Get the latest official distribution [here][9] (version 2.4.2). - -The latest development version can be grabbed from [GitHub][10]. Feel free to submit any patches there through the fork and pull request process. - -## Version history - - * **v1.0:** Initial public release - * **v1.1:** The archive can be passed parameters that will be passed on to the embedded script, thanks to John C. Quillan - * **v1.2:** Cosmetic updates, support for bzip2 compression and non-temporary archives. Many ideas thanks to Francois Petitjean. - * **v1.3:** More patches from Bjarni R. Einarsson and Francois Petitjean: Support for no compression (`--nocomp`), script is no longer mandatory, automatic launch in an xterm, optional verbose output, and -target archive option to indicate where to extract the files. - * **v1.4:** Many patches from Francois Petitjean: improved UNIX compatibility, automatic integrity checking, support of LSM files to get info on the package at run time.. - * **v1.5.x:** A lot of bugfixes, and many other patches, including automatic verification through the usage of checksums. Version 1.5.5 was the stable release for a long time, even though the Web page didn't get updated ;-). Makeself was also officially made a part of the [Loki Setup installer][11], and its source is being maintained as part of this package. - * **v2.0:** Complete internal rewrite of Makeself. The command-line parsing was vastly improved, the overall maintenance of the package was greatly improved by separating the stub from makeself.sh. Also Makeself was ported and tested to a variety of Unix platforms. - * **v2.0.1:** First public release of the new 2.0 branch. Prior versions are officially obsoleted. This release introduced the `--copy` argument that was introduced in response to a need for the [UT2K3][12] Linux installer. - * **v2.1.0:** Big change : Makeself can now support multiple embedded tarballs, each stored separately with their own checksums. An existing archive can be updated with the `--append` flag. Checksums are also better managed, and the `--nochown` option for archives appeared. - * **v2.1.1:** Fixes related to the Unix compression (compress command). Some Linux distributions made the insane choice to make it unavailable, even though gzip is capable of uncompressing these files, plus some more bugfixes in the extraction and checksum code. - * **v2.1.2:** Some bug fixes. Use head -n to avoid problems with POSIX conformance. - * **v2.1.3:** Bug fixes with the command line when spawning terminals. Added `--tar`, `--noexec` for archives. Added `--nomd5` and `--nocrc` to avoid creating checksums in archives. The embedded script is now run through "eval". The `--info` output now includes the command used to create the archive. A man page was contributed by Bartosz Fenski. - * **v2.1.4:** Fixed `--info` output. Generate random directory name when extracting files to . to avoid problems. Better handling of errors with wrong permissions for the directory containing the files. Avoid some race conditions, Unset the $CDPATH variable to avoid problems if it is set. Better handling of dot files in the archive directory. - * **v2.1.5:** Made the md5sum detection consistent with the header code. Check for the presence of the archive directory. Added `--encrypt` for symmetric encryption through gpg (Eric Windisch). Added support for the digest command on Solaris 10 for MD5 checksums. Check for available disk space before extracting to the target directory (Andreas Schweitzer). Allow extraction to run asynchronously (patch by Peter Hatch). Use file descriptors internally to avoid error messages (patch by Kay Tiong Khoo). - * **v2.1.6:** Replaced one dot per file progress with a realtime progress percentage and a spinning cursor. Added `--noprogress` to prevent showing the progress during the decompression. Added `--target` dir to allow extracting directly to a target directory. (Guy Baconniere) - * **v2.2.0:** First major new release in years! Includes many bugfixes and user contributions. Please look at the [project page on Github][10] for all the details. - * **v2.3.0:** Support for archive encryption via GPG or OpenSSL. Added LZO and LZ4 compression support. Options to set the packaging date and stop the umask from being overriden. Optionally ignore check for available disk space when extracting. New option to check for root permissions before extracting. - * **v2.3.1:** Various compatibility updates. Added unit tests for Travis CI in the GitHub repo. New `--tar-extra`, `--untar-extra`, `--gpg-extra`, `--gpg-asymmetric-encrypt-sign` options. - * **v2.4.0:** Added optional support for SHA256 archive integrity checksums. - * **v2.4.2:** New --cleanup and --cleanup-args arguments for cleanup scripts. Added threading support for supported compressors. Now supports zstd compression. - * **v2.4.3:** Make explicit POSIX tar archives for increased compatibility. - * **v2.4.4:** Fixed various compatibility issues (no longer use POSIX tar archives), Github Actions to check on Solaris and FreeBSD. - * **v2.4.5:** Added `--tar-format` option to set the tar archive format (default is ustar) - -## Links - - * Check out the ["Loki Setup"][11] installer, used to install many Linux games and other applications, and of which I am the co-author. Since the demise of Loki, I am now the official maintainer of the project, and it is now being hosted here on GitHub. - * Bjarni R. Einarsson also wrote the **setup.sh** installer script, inspired by Makeself. [Check it out !][14] - -## Contact - -This script was written by [Stéphane Peter][15] (megastep at megastep.org). Any enhancements and suggestions are welcome. - -Contributions were included from John C. Quillan, Bjarni R. Einarsson, -Francois Petitjean, Ryan C. Gordon, and many contributors on GitHub. If you think I forgot -your name, don't hesitate to contact me. - -This project is now hosted on GitHub. Feel free to submit patches and bug reports on the [project page][10]. - -* * * - -[Stephane Peter][2] - - [1]: http://makeself.io/ - [2]: mailto:megastep@megastep.org - [3]: http://www.idsoftware.com/ - [4]: http://www.lokigames.com/products/myth2/updates.php3 - [5]: http://www.nvidia.com/ - [6]: http://earth.google.com/ - [7]: http://www.virtualbox.org/ - [8]: http://www.gnu.org/copyleft/gpl.html - [9]: https://github.com/megastep/makeself/releases/download/release-2.4.5/makeself-2.4.5.run - [10]: https://github.com/megastep/makeself - [11]: https://github.com/megastep/loki_setup/ - [12]: http://www.unrealtournament2003.com/ - [13]: http://www.icculus.org/ - [14]: http://bre.klaki.net/programs/setup.sh/ - [15]: https://stephanepeter.com/ diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/VERSION b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/VERSION deleted file mode 100755 index 59aa62c1fa4c234af19118ff8d8572c1d50437fd..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.4.5 diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/make-release.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/make-release.sh deleted file mode 100755 index b5692d49071716e68c821688b9ded040bd3a11c4..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/make-release.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -# -# Create a distributable archive of the current version of Makeself - -VER=`cat VERSION` -mkdir -p /tmp/makeself-$VER release -cp -pPR makeself* test README.md COPYING VERSION .gitmodules /tmp/makeself-$VER/ -./makeself.sh --notemp /tmp/makeself-$VER release/makeself-$VER.run "Makeself v$VER" echo "Makeself has extracted itself" - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself-header.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself-header.sh deleted file mode 100755 index 9409031483e2bc377d344d64b34b13877a0afd7b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself-header.sh +++ /dev/null @@ -1,660 +0,0 @@ -cat << EOF > "$archname" -#!/bin/bash -# This script was generated using Makeself $MS_VERSION -# The license covering this archive and its contents, if any, is wholly independent of the Makeself license (GPL) -# 2022.3.19-Modified the MS_Help function and some options -# Huawei Technologies Co., Ltd. - -ORIG_UMASK=\`umask\` - -CRCsum="$CRCsum" -MD5="$MD5sum" -SHA="$SHAsum" -SIGNATURE="$Signature" -TMPROOT=\${TMPDIR:="\$HOME"} -if ! test -d "\$TMPROOT"; then - TMPROOT="\$PWD" -fi -export TMPDIR="\$TMPROOT" -USER_PWD="\$PWD" -if ! test -d "\$USER_PWD"; then - exit 1 -fi -export USER_PWD -ARCHIVE_DIR=\`dirname "\$0"\` -export ARCHIVE_DIR - -name_of_file="\$0 " -pwd_of_file="\$PWD" -label="$LABEL" -script="$SCRIPT" -scriptargs="$SCRIPTARGS" -cleanup_script="${CLEANUP_SCRIPT}" -licensetxt="$LICENSE" -helpheader='$HELPHEADER' -targetdir="$archdirname" -filesizes="$filesizes" -totalsize="$totalsize" -keep="$KEEP" -nooverwrite="$NOOVERWRITE" -quiet="n" -accept="n" -nodiskspace="n" -export_conf="$EXPORT_CONF" -decrypt_cmd="$DECRYPT_CMD" -skip="$SKIP" - -print_cmd_arg="" -if type printf > /dev/null; then - print_cmd="printf" -elif test -x /usr/ucb/echo; then - print_cmd="/usr/ucb/echo" -else - print_cmd="echo" -fi - -if test -d /usr/xpg4/bin; then - PATH=/usr/xpg4/bin:\$PATH - export PATH -fi - -if test -d /usr/sfw/bin; then - PATH=\$PATH:/usr/sfw/bin - export PATH -fi - -unset CDPATH - -MS_Printf() -{ - \$print_cmd \$print_cmd_arg "\$1" -} - -MS_PrintLicense() -{ - PAGER=\${PAGER:=more} - if test x"\$licensetxt" != x; then - PAGER_PATH=\`exec <&- 2>&-; which \$PAGER || command -v \$PAGER || type \$PAGER\` - if test -x "\$PAGER_PATH"; then - echo "\$licensetxt" | \$PAGER - else - echo "\$licensetxt" - fi - if test x"\$accept" != xy; then - while true - do - MS_Printf "Please type y to accept, n otherwise: " - read yn - if test x"\$yn" = xn; then - keep=n - eval \$finish; exit 1 - break; - elif test x"\$yn" = xy; then - break; - fi - done - fi - fi -} - -MS_diskspace() -{ - ( - df -kP "\$1" | tail -1 | awk '{ if (\$4 ~ /%/) {print \$3} else {print \$4} }' - ) -} - -MS_dd() -{ - blocks=\`expr \$3 / 1024\` - bytes=\`expr \$3 % 1024\` - # Test for ibs, obs and conv feature - if dd if=/dev/zero of=/dev/null count=1 ibs=512 obs=512 conv=sync 2> /dev/null; then - dd if="\$1" ibs=\$2 skip=1 obs=1024 conv=sync 2> /dev/null | \\ - { test \$blocks -gt 0 && dd ibs=1024 obs=1024 count=\$blocks ; \\ - test \$bytes -gt 0 && dd ibs=1 obs=1024 count=\$bytes ; } 2> /dev/null - else - dd if="\$1" bs=\$2 skip=1 2> /dev/null - fi -} - -MS_dd_Progress() -{ - if test x"\$noprogress" = xy; then - MS_dd "\$@" - return \$? - fi - file="\$1" - offset=\$2 - length=\$3 - pos=0 - bsize=4194304 - while test \$bsize -gt \$length; do - bsize=\`expr \$bsize / 4\` - done - blocks=\`expr \$length / \$bsize\` - bytes=\`expr \$length % \$bsize\` - ( - dd ibs=\$offset skip=1 2>/dev/null - pos=\`expr \$pos \+ \$bsize\` - MS_Printf " 0%% " 1>&2 - if test \$blocks -gt 0; then - while test \$pos -le \$length; do - dd bs=\$bsize count=1 2>/dev/null - pcent=\`expr \$length / 100\` - pcent=\`expr \$pos / \$pcent\` - if test \$pcent -lt 100; then - MS_Printf "\b\b\b\b\b\b\b" 1>&2 - if test \$pcent -lt 10; then - MS_Printf " \$pcent%% " 1>&2 - else - MS_Printf " \$pcent%% " 1>&2 - fi - fi - pos=\`expr \$pos \+ \$bsize\` - done - fi - if test \$bytes -gt 0; then - dd bs=\$bytes count=1 2>/dev/null - fi - MS_Printf "\b\b\b\b\b\b\b" 1>&2 - MS_Printf " 100%% " 1>&2 - ) < "\$file" -} - -MS_Help() -{ - cat << EOH >&2 -Usage: \$0 [options] -Options: - --help | -h Print this message - --info Print embedded info : title, default target directory, embedded script ... - --list Print the list of files in the archive - --check Checks integrity and version dependency of the archive - --quiet Quiet install mode, skip human-computer interactions - --nox11 Do not spawn an xterm - --noexec Do not run embedded script - --extract= Extract directly to a target directory (absolute or relative) - Usually used with --noexec to just extract files without running - --tar arg1 [arg2 ...] Access the contents of the archive through the tar command -\${helpheader} -EOH -} - -MS_Verify_Sig() -{ - GPG_PATH=\`exec <&- 2>&-; which gpg || command -v gpg || type gpg\` - MKTEMP_PATH=\`exec <&- 2>&-; which mktemp || command -v mktemp || type mktemp\` - test -x "\$GPG_PATH" || GPG_PATH=\`exec <&- 2>&-; which gpg || command -v gpg || type gpg\` - test -x "\$MKTEMP_PATH" || MKTEMP_PATH=\`exec <&- 2>&-; which mktemp || command -v mktemp || type mktemp\` - offset=\`head -n "\$skip" "\$1" | wc -c | tr -d " "\` - temp_sig=\`mktemp -t XXXXX\` - echo \$SIGNATURE | base64 --decode > "\$temp_sig" - gpg_output=\`MS_dd "\$1" \$offset \$totalsize | LC_ALL=C "\$GPG_PATH" --verify "\$temp_sig" - 2>&1\` - gpg_res=\$? - rm -f "\$temp_sig" - if test \$gpg_res -eq 0 && test \`echo \$gpg_output | grep -c Good\` -eq 1; then - if test \`echo \$gpg_output | grep -c \$sig_key\` -eq 1; then - test x"\$quiet" = xn && echo "GPG signature is good" >&2 - else - echo "GPG Signature key does not match" >&2 - exit 2 - fi - else - test x"\$quiet" = xn && echo "GPG signature failed to verify" >&2 - exit 2 - fi -} - -MS_Check() -{ - OLD_PATH="\$PATH" - PATH=\${GUESS_MD5_PATH:-"\$OLD_PATH:/bin:/usr/bin:/sbin:/usr/local/ssl/bin:/usr/local/bin:/opt/openssl/bin"} - MD5_ARG="" - MD5_PATH=\`exec <&- 2>&-; which md5sum || command -v md5sum || type md5sum\` - test -x "\$MD5_PATH" || MD5_PATH=\`exec <&- 2>&-; which md5 || command -v md5 || type md5\` - test -x "\$MD5_PATH" || MD5_PATH=\`exec <&- 2>&-; which digest || command -v digest || type digest\` - PATH="\$OLD_PATH" - - SHA_PATH=\`exec <&- 2>&-; which shasum || command -v shasum || type shasum\` - test -x "\$SHA_PATH" || SHA_PATH=\`exec <&- 2>&-; which sha256sum || command -v sha256sum || type sha256sum\` - - if test x"\$quiet" = xn; then - MS_Printf "Verifying archive integrity..." - fi - offset=\`head -n "\$skip" "\$1" | wc -c | tr -d " "\` - fsize=\`cat "\$1" | wc -c | tr -d " "\` - if test \$totalsize -ne \`expr \$fsize - \$offset\`; then - echo " Unexpected archive size." >&2 - exit 2 - fi - verb=\$2 - i=1 - for s in \$filesizes - do - crc=\`echo \$CRCsum | cut -d" " -f\$i\` - if test -x "\$SHA_PATH"; then - if test x"\`basename \$SHA_PATH\`" = xshasum; then - SHA_ARG="-a 256" - fi - sha=\`echo \$SHA | cut -d" " -f\$i\` - if test x"\$sha" = x0000000000000000000000000000000000000000000000000000000000000000; then - test x"\$verb" = xy && echo " \$1 does not contain an embedded SHA256 checksum." >&2 - else - shasum=\`MS_dd_Progress "\$1" \$offset \$s | eval "\$SHA_PATH \$SHA_ARG" | cut -b-64\`; - if test x"\$shasum" != x"\$sha"; then - echo "Error in SHA256 checksums: \$shasum is different from \$sha" >&2 - exit 2 - elif test x"\$quiet" = xn; then - MS_Printf " SHA256 checksums are OK." >&2 - fi - crc="0000000000"; - fi - fi - if test -x "\$MD5_PATH"; then - if test x"\`basename \$MD5_PATH\`" = xdigest; then - MD5_ARG="-a md5" - fi - md5=\`echo \$MD5 | cut -d" " -f\$i\` - if test x"\$md5" = x00000000000000000000000000000000; then - test x"\$verb" = xy && echo " \$1 does not contain an embedded MD5 checksum." >&2 - else - md5sum=\`MS_dd_Progress "\$1" \$offset \$s | eval "\$MD5_PATH \$MD5_ARG" | cut -b-32\`; - if test x"\$md5sum" != x"\$md5"; then - echo "Error in MD5 checksums: \$md5sum is different from \$md5" >&2 - exit 2 - elif test x"\$quiet" = xn; then - MS_Printf " MD5 checksums are OK." >&2 - fi - crc="0000000000"; verb=n - fi - fi - if test x"\$crc" = x0000000000; then - test x"\$verb" = xy && echo " \$1 does not contain a CRC checksum." >&2 - else - sum1=\`MS_dd_Progress "\$1" \$offset \$s | CMD_ENV=xpg4 cksum | awk '{print \$1}'\` - if test x"\$sum1" != x"\$crc"; then - echo "Error in checksums: \$sum1 is different from \$crc" >&2 - exit 2 - elif test x"\$quiet" = xn; then - MS_Printf " CRC checksums are OK." >&2 - fi - fi - i=\`expr \$i + 1\` - offset=\`expr \$offset + \$s\` - done - if test x"\$quiet" = xn; then - echo " All good." - fi -} - -MS_Decompress() -{ - if test x"\$decrypt_cmd" != x""; then - { eval "\$decrypt_cmd" || echo " ... Decryption failed." >&2; } | eval "$GUNZIP_CMD" - else - eval "$GUNZIP_CMD" - fi - - if test \$? -ne 0; then - echo " ... Decompression failed." >&2 - fi -} - -UnTAR() -{ - if test x"\$quiet" = xn; then - tar \$1vf - $UNTAR_EXTRA 2>&1 || { echo " ... Extraction failed." >&2; kill -15 \$$; } - else - tar \$1f - $UNTAR_EXTRA 2>&1 || { echo Extraction failed. >&2; kill -15 \$$; } - fi -} - -MS_exec_cleanup() { - if test x"\$cleanup" = xy && test x"\$cleanup_script" != x""; then - cleanup=n - cd "\$tmpdir" - eval "\"\$cleanup_script\" \$scriptargs \$cleanupargs" - fi -} - -MS_cleanup() -{ - echo 'Signal caught, cleaning up' >&2 - MS_exec_cleanup - cd "\$TMPROOT" - rm -rf "\$tmpdir" - eval \$finish; exit 15 -} - -Script_Args_Check() -{ - script_supported_args=\$(echo \${helpheader} | grep -o -E "\-\-[^ ]+" | awk -F"=" {'print \$1'}) - arg_to_test=\$(echo \$1|awk -F"=" {'print \$1'}) - - for arg in \${script_supported_args}; - do - if test x"\$arg_to_test" = x"\$arg" ;then - return - fi - done - - MS_Help - exit 1 -} - -finish=true -xterm_loop= -noprogress=$NOPROGRESS -nox11=$NOX11 -copy=$COPY -ownership=$OWNERSHIP -verbose=n -cleanup=y -cleanupargs= -sig_key= - -initargs="\$@" - -while [ -n "\$*" ] -do - case "\$1" in - -h | --help) - MS_Help - exit 0 - ;; - -q | --quiet) - quiet=y - noprogress=y - shift - ;; - --info) - echo Identification: "\$label" - echo Target directory: "\$targetdir" - echo Uncompressed size: $USIZE KB - echo Compression: $COMPRESS - if test x"$ENCRYPT" != x""; then - echo Encryption: $ENCRYPT - fi - echo Date of packaging: $DATE - echo Built with Makeself version $MS_VERSION - echo Build command was: "$MS_COMMAND" - if test x"\$script" != x; then - echo Script run after extraction: - echo " " \$script \$scriptargs - fi - if test x"$copy" = xcopy; then - echo "Archive will copy itself to a temporary location" - fi - if test x"$NEED_ROOT" = xy; then - echo "Root permissions required for extraction" - fi - if test x"$KEEP" = xy; then - echo "directory \$targetdir is permanent" - else - echo "\$targetdir will be removed after extraction" - fi - exit 0 - ;; - --list) - echo Target directory: \$targetdir - offset=\`head -n "\$skip" "\$0" | wc -c | tr -d " "\` - for s in \$filesizes - do - MS_dd "\$0" \$offset \$s | MS_Decompress | UnTAR t - offset=\`expr \$offset + \$s\` - done - exit 0 - ;; - --tar) - offset=\`head -n "\$skip" "\$0" | wc -c | tr -d " "\` - arg1="\$2" - shift 2 || { MS_Help; exit 1; } - for s in \$filesizes - do - MS_dd "\$0" \$offset \$s | MS_Decompress | tar "\$arg1" - "\$@" - offset=\`expr \$offset + \$s\` - done - exit 0 - ;; - --check) - MS_Check "\$0" y - scriptargs="\$scriptargs \$1" - shift - ;; - --noexec) - script="" - cleanup_script="" - shift - ;; - --extract=*) - keep=y - targetdir=\`echo \$1 | cut -d"=" -f2 \` - if ! shift; then MS_Help; exit 1; fi - ;; - --nox11) - nox11=y - shift - ;; - --xwin) - if test "$NOWAIT" = n; then - finish="echo Press Return to close this window...; read junk" - fi - xterm_loop=1 - shift - ;; - --phase2) - copy=phase2 - shift - ;; - --repack | --repack-path=*) - Script_Args_Check \$1 - scriptargs="\$scriptargs '\$1'" - shift - if [[ ! "\$1" =~ ^-.* ]]; then - scriptargs="\$scriptargs '\$1'" - shift - fi - ;; - *) - Script_Args_Check \$1 - scriptargs="\$scriptargs '\$1'" - shift - ;; - esac -done - -quiet_para="" -if test x"\$quiet" = xy; then - quiet_para="--quiet " -fi -scriptargs="--\$name_of_file""--\"\$pwd_of_file\""" \$quiet_para""\$scriptargs" - -if test x"\$quiet" = xy -a x"\$verbose" = xy; then - echo Cannot be verbose and quiet at the same time. >&2 - exit 1 -fi - -if test x"$NEED_ROOT" = xy -a \`id -u\` -ne 0; then - echo "Administrative privileges required for this archive (use su or sudo)" >&2 - exit 1 -fi - -if test x"\$copy" \!= xphase2; then - MS_PrintLicense -fi - -case "\$copy" in -copy) - tmpdir="\$TMPROOT"/makeself.\$RANDOM.\`date +"%y%m%d%H%M%S"\`.\$\$ - mkdir "\$tmpdir" || { - echo "Could not create temporary directory \$tmpdir" >&2 - exit 1 - } - SCRIPT_COPY="\$tmpdir/makeself" - echo "Copying to a temporary location..." >&2 - cp "\$0" "\$SCRIPT_COPY" - chmod +x "\$SCRIPT_COPY" - cd "\$TMPROOT" - exec "\$SCRIPT_COPY" --phase2 -- \$initargs - ;; -phase2) - finish="\$finish ; rm -rf \`dirname \$0\`" - ;; -esac - -if test x"\$nox11" = xn; then - if tty -s; then # Do we have a terminal? - : - else - if test x"\$DISPLAY" != x -a x"\$xterm_loop" = x; then # No, but do we have X? - if xset q > /dev/null 2>&1; then # Check for valid DISPLAY variable - GUESS_XTERMS="xterm gnome-terminal rxvt dtterm eterm Eterm xfce4-terminal lxterminal kvt konsole aterm terminology" - for a in \$GUESS_XTERMS; do - if type \$a >/dev/null 2>&1; then - XTERM=\$a - break - fi - done - chmod a+x \$0 || echo Please add execution rights on \$0 - if test \`echo "\$0" | cut -c1\` = "/"; then # Spawn a terminal! - exec \$XTERM -e "\$0 --xwin \$initargs" - else - exec \$XTERM -e "./\$0 --xwin \$initargs" - fi - fi - fi - fi -fi - -if test x"\$targetdir" = x.; then - tmpdir="." -else - if test x"\$keep" = xy; then - if test x"\$nooverwrite" = xy && test -d "\$targetdir"; then - echo "Target directory \$targetdir already exists, aborting." >&2 - exit 1 - fi - if test x"\$quiet" = xn; then - echo "Creating directory \$targetdir" >&2 - fi - tmpdir="\$targetdir" - dashp="-p" - else - tmpdir="\$TMPROOT/selfgz\$\$\$RANDOM" - dashp="" - fi - mkdir \$dashp "\$tmpdir" || { - echo 'Cannot create target directory' \$tmpdir >&2 - echo 'You should try option --extract=' >&2 - eval \$finish - exit 1 - } -fi - -location="\`pwd\`" -if test x"\$SETUP_NOCHECK" != x1; then - MS_Check "\$0" -fi -offset=\`head -n "\$skip" "\$0" | wc -c | tr -d " "\` - -if test x"\$verbose" = xy; then - MS_Printf "About to extract $USIZE KB in \$tmpdir ... Proceed ? [Y/n] " - read yn - if test x"\$yn" = xn; then - eval \$finish; exit 1 - fi -fi - -if test x"\$quiet" = xn; then - # Decrypting with openssl will ask for password, - # the prompt needs to start on new line - if test x"$ENCRYPT" = x"openssl"; then - echo "Decrypting and uncompressing \$label..." - else - MS_Printf "Uncompressing \$label" - fi -fi -res=3 -if test x"\$keep" = xn; then - trap MS_cleanup 1 2 3 15 -fi - -if test x"\$nodiskspace" = xn; then - leftspace=\`MS_diskspace "\$tmpdir"\` - if test -n "\$leftspace"; then - if test "\$leftspace" -lt $USIZE; then - echo - echo "Not enough space left in "\`dirname \$tmpdir\`" (\$leftspace KB) to decompress \$0 ($USIZE KB)" >&2 - if test x"\$keep" = xn; then - echo "Consider setting TMPDIR to a directory with more free space." - fi - eval \$finish; exit 1 - fi - fi -fi - -for s in \$filesizes -do - if MS_dd_Progress "\$0" \$offset \$s | MS_Decompress | ( cd "\$tmpdir"; umask \$ORIG_UMASK ; UnTAR xp ) 1>/dev/null; then - if test x"\$ownership" = xy; then - (cd "\$tmpdir"; chown -R \`id -u\` .; chgrp -R \`id -g\` .) - fi - else - echo >&2 - echo "Unable to decompress \$0" >&2 - eval \$finish; exit 1 - fi - offset=\`expr \$offset + \$s\` -done -if test x"\$quiet" = xn; then - echo -fi - -cd "\$tmpdir" -res=0 -if test x"\$script" != x; then - if test x"\$export_conf" = x"y"; then - MS_BUNDLE="\$0" - MS_LABEL="\$label" - MS_SCRIPT="\$script" - MS_SCRIPTARGS="\$scriptargs" - MS_ARCHDIRNAME="\$archdirname" - MS_KEEP="\$KEEP" - MS_NOOVERWRITE="\$NOOVERWRITE" - MS_COMPRESS="\$COMPRESS" - MS_CLEANUP="\$cleanup" - export MS_BUNDLE MS_LABEL MS_SCRIPT MS_SCRIPTARGS - export MS_ARCHDIRNAME MS_KEEP MS_NOOVERWRITE MS_COMPRESS - fi - - if test x"\$verbose" = x"y"; then - yn="x" - while test x"\$yn" != x -a x"\$yn" != xy -a x"\$yn" != xY -a x"\$yn" != xn -a x"\$yn" != xN - do - MS_Printf "OK to execute: \$script \$scriptargs \$* ? [Y/n] " - read yn - if test x"\$yn" = x -o x"\$yn" = xy -o x"\$yn" = xY; then - eval "\"\$script\" \$scriptargs \"\\\$@\""; res=\$?; - elif test x"\$yn" = xn -o x"\$yn" = xN; then - echo "Unable to decompress \$script ,because of aborting! ";res=\$? - else - echo "Input value is unacceptable,please try again." - fi - done - else - eval "\"\$script\" \$scriptargs \"\\\$@\""; res=\$? - fi - if test "\$res" -ne 0; then - test x"\$verbose" = xy && echo "The program '\$script' returned an error code (\$res)" >&2 - fi -fi - -MS_exec_cleanup - -if test x"\$keep" = xn; then - cd "\$TMPROOT" - rm -rf "\$tmpdir" -fi -eval \$finish; exit \$res -EOF diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.1 b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.1 deleted file mode 100755 index 81bf6e4ff4cfeb226c0a0992d8e6d2b94dd4f172..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.1 +++ /dev/null @@ -1,110 +0,0 @@ -.TH "MAKESELF" "1" "2.4.5" -.SH "NAME" -makeself \- An utility to generate self-extractable archives. -.SH "SYNTAX" -.B makeself [\fIoptions\fP] archive_dir file_name label -.B [\fIstartup_script\fP] [\fIargs\fP] -.SH "DESCRIPTION" -This program is a free (GPL) utility designed to create self-extractable -archives from a directory. -.SH "OPTIONS" -The following options are supported. -.TP 15 -.B -v, --version -Prints out the makeself version number and exits. -.TP -.B -h, --help -Print out help information. -.TP -.B --tar-quietly -Suppress verbose output from the tar command -.TP -.B --quiet -Do not print any messages other than errors -.TP -.B --gzip -Compress using gzip (default if detected). -.TP -.B --bzip2 -Compress using bzip2. -.TP -.B --pbzip2 -Compress using pbzip2. -.TP -.B --xz -Compress using xz. -.TP -.B --lzo -Compress using lzop. -.TP -.B --lz4 -Compress using lz4. -.TP -.B --compress -Compress using the UNIX 'compress' command. -.TP -.B --nocomp -Do not compress the data. -.TP -.B --complevel lvl -Specify the compression level for gzip,bzip2,pbzui2,xz,lzo or lz4 -.TP -.B --notemp -The archive will create archive_dir in the current directory and -uncompress in ./archive_dir. -.TP -.B --copy -Upon extraction, the archive will first copy itself to a temporary directory. -.TP -.B --append -Append more files to an existing makeself archive. The label and startup scripts will then be ignored. -.TP -.B --current -Files will be extracted to the current directory. Both --current and --target dir imply --notemp. -.TP -.B --target dir -Extract directly to a target directory. Directory path can be either absolute or relative. -.TP -.B --header file -Specify location of the header script. -.TP -.B --cleanup file -Specify a cleanup script that executes on interrupt and when finished successfully. -.TP -.B --follow -Follow the symlinks in the archive. -.TP -.B --noprogress -Do not show the progress during the decompression. -.TP -.B --nox11 -Disable automatic spawn of an xterm if running in X11. -.TP -.B --nowait -Do not wait for user input after executing embedded program from an xterm. -.TP -.B --nomd5 -Do not create a MD5 checksum for the archive. -.TP -.B --nocrc -Do not create a CRC32 checksum for the archive. -.TP -.B --lsm file -LSM file describing the package. -.B --packaging-date date -Use provided string as the packaging date instead of the current date. -.SH "EXAMPLES" -Here is an example, assuming the user has a package image stored in a /home/joe/mysoft, -and he wants to generate a self-extracting package named mysoft.sh, which will launch -the "setup" script initially stored in /home/joe/mysoft: -.TP -makeself.sh /home/joe/mysoft mysoft.sh "Joe's Nice Software Package" ./setup -.TP -Here is also how I created the makeself.run archive which contains the Makeself distribution: -.TP -makeself.sh --notemp makeself makeself.run "Makeself by Stephane Peter" echo "Makeself has extracted itself" -.SH "AUTHORS" -Makeself has been written by Stéphane Peter . -.BR -This man page was originally written by Bartosz Fenski for the -Debian GNU/Linux distribution (but it may be used by others). diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.lsm b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.lsm deleted file mode 100755 index 3c4cea8c18982e288f0d51eba9b4d97f0f708f32..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.lsm +++ /dev/null @@ -1,16 +0,0 @@ -Begin3 -Title: makeself.sh -Version: 2.4.5 -Description: makeself.sh is a shell script that generates a self-extractable - tar.gz archive from a directory. The resulting file appears as a shell - script, and can be launched as is. The archive will then uncompress - itself to a temporary directory and an arbitrary command will be - executed (for example an installation script). This is pretty similar - to archives generated with WinZip Self-Extractor in the Windows world. -Keywords: Installation archive tar winzip -Author: Stephane Peter (megastep@megastep.org) -Maintained-by: Stephane Peter (megastep@megastep.org) -Original-site: https://makeself.io/ -Platform: Unix -Copying-policy: GPL -End diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.sh deleted file mode 100755 index c8ea565971c5ac03c775a665596a593287881708..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/makeself.sh +++ /dev/null @@ -1,822 +0,0 @@ -#!/bin/sh -# -# Makeself version 2.4.x -# by Stephane Peter -# -# Utility to create self-extracting tar.gz archives. -# The resulting archive is a file holding the tar.gz archive with -# a small Shell script stub that uncompresses the archive to a temporary -# directory and then executes a given script from withing that directory. -# -# Makeself home page: https://makeself.io/ -# -# Version 2.0 is a rewrite of version 1.0 to make the code easier to read and maintain. -# -# Version history : -# - 1.0 : Initial public release -# - 1.1 : The archive can be passed parameters that will be passed on to -# the embedded script, thanks to John C. Quillan -# - 1.2 : Package distribution, bzip2 compression, more command line options, -# support for non-temporary archives. Ideas thanks to Francois Petitjean -# - 1.3 : More patches from Bjarni R. Einarsson and Francois Petitjean: -# Support for no compression (--nocomp), script is no longer mandatory, -# automatic launch in an xterm, optional verbose output, and -target -# archive option to indicate where to extract the files. -# - 1.4 : Improved UNIX compatibility (Francois Petitjean) -# Automatic integrity checking, support of LSM files (Francois Petitjean) -# - 1.5 : Many bugfixes. Optionally disable xterm spawning. -# - 1.5.1 : More bugfixes, added archive options -list and -check. -# - 1.5.2 : Cosmetic changes to inform the user of what's going on with big -# archives (Quake III demo) -# - 1.5.3 : Check for validity of the DISPLAY variable before launching an xterm. -# More verbosity in xterms and check for embedded command's return value. -# Bugfix for Debian 2.0 systems that have a different "print" command. -# - 1.5.4 : Many bugfixes. Print out a message if the extraction failed. -# - 1.5.5 : More bugfixes. Added support for SETUP_NOCHECK environment variable to -# bypass checksum verification of archives. -# - 1.6.0 : Compute MD5 checksums with the md5sum command (patch from Ryan Gordon) -# - 2.0 : Brand new rewrite, cleaner architecture, separated header and UNIX ports. -# - 2.0.1 : Added --copy -# - 2.1.0 : Allow multiple tarballs to be stored in one archive, and incremental updates. -# Added --nochown for archives -# Stopped doing redundant checksums when not necesary -# - 2.1.1 : Work around insane behavior from certain Linux distros with no 'uncompress' command -# Cleaned up the code to handle error codes from compress. Simplified the extraction code. -# - 2.1.2 : Some bug fixes. Use head -n to avoid problems. -# - 2.1.3 : Bug fixes with command line when spawning terminals. -# Added --tar for archives, allowing to give arbitrary arguments to tar on the contents of the archive. -# Added --noexec to prevent execution of embedded scripts. -# Added --nomd5 and --nocrc to avoid creating checksums in archives. -# Added command used to create the archive in --info output. -# Run the embedded script through eval. -# - 2.1.4 : Fixed --info output. -# Generate random directory name when extracting files to . to avoid problems. (Jason Trent) -# Better handling of errors with wrong permissions for the directory containing the files. (Jason Trent) -# Avoid some race conditions (Ludwig Nussel) -# Unset the $CDPATH variable to avoid problems if it is set. (Debian) -# Better handling of dot files in the archive directory. -# - 2.1.5 : Made the md5sum detection consistent with the header code. -# Check for the presence of the archive directory -# Added --encrypt for symmetric encryption through gpg (Eric Windisch) -# Added support for the digest command on Solaris 10 for MD5 checksums -# Check for available disk space before extracting to the target directory (Andreas Schweitzer) -# Allow extraction to run asynchronously (patch by Peter Hatch) -# Use file descriptors internally to avoid error messages (patch by Kay Tiong Khoo) -# - 2.1.6 : Replaced one dot per file progress with a realtime progress percentage and a spining cursor (Guy Baconniere) -# Added --noprogress to prevent showing the progress during the decompression (Guy Baconniere) -# Added --target dir to allow extracting directly to a target directory (Guy Baconniere) -# - 2.2.0 : Many bugfixes, updates and contributions from users. Check out the project page on Github for the details. -# - 2.3.0 : Option to specify packaging date to enable byte-for-byte reproducibility. (Marc Pawlowsky) -# - 2.4.0 : Optional support for SHA256 checksums in archives. -# - 2.4.2 : Add support for threads for several compressors. (M. Limber) -# Added zstd support. -# - 2.4.3 : Make explicit POSIX tar archives for increased compatibility. -# - 2.4.5 : Added --tar-format to override ustar tar archive format -# -# (C) 1998-2021 by Stephane Peter -# -# This software is released under the terms of the GNU GPL version 2 and above -# Please read the license at http://www.gnu.org/copyleft/gpl.html -# Self-extracting archives created with this script are explictly NOT released under the term of the GPL -# - -MS_VERSION=2.4.5 -MS_COMMAND="$0" -unset CDPATH - -for f in ${1+"$@"}; do - MS_COMMAND="$MS_COMMAND \\\\ - \\\"$f\\\"" -done - -# For Solaris systems -if test -d /usr/xpg4/bin; then - PATH=/usr/xpg4/bin:$PATH - export PATH -fi - -# Procedures - -MS_Usage() -{ - echo "Usage: $0 [args] archive_dir file_name label startup_script [script_args]" - echo "args can be one or more of the following :" - echo " --version | -v : Print out Makeself version number and exit" - echo " --help | -h : Print out this help message" - echo " --tar-quietly : Suppress verbose output from the tar command" - echo " --quiet | -q : Do not print any messages other than errors." - echo " --gzip : Compress using gzip (default if detected)" - echo " --pigz : Compress with pigz" - echo " --zstd : Compress with zstd" - echo " --bzip2 : Compress using bzip2 instead of gzip" - echo " --pbzip2 : Compress using pbzip2 instead of gzip" - echo " --xz : Compress using xz instead of gzip" - echo " --lzo : Compress using lzop instead of gzip" - echo " --lz4 : Compress using lz4 instead of gzip" - echo " --compress : Compress using the UNIX 'compress' command" - echo " --complevel lvl : Compression level for gzip pigz zstd xz lzo lz4 bzip2 and pbzip2 (default 9)" - echo " --threads thds : Number of threads to be used by compressors that support parallelization." - echo " Omit to use compressor's default. Most useful (and required) for opting" - echo " into xz's threading, usually with '--threads=0' for all available cores." - echo " pbzip2 and pigz are parallel by default, and setting this value allows" - echo " limiting the number of threads they use." - echo " --base64 : Instead of compressing, encode the data using base64" - echo " --gpg-encrypt : Instead of compressing, encrypt the data using GPG" - echo " --gpg-asymmetric-encrypt-sign" - echo " : Instead of compressing, asymmetrically encrypt and sign the data using GPG" - echo " --gpg-extra opt : Append more options to the gpg command line" - echo " --ssl-encrypt : Instead of compressing, encrypt the data using OpenSSL" - echo " --ssl-passwd pass : Use the given password to encrypt the data using OpenSSL" - echo " --ssl-pass-src src : Use the given src as the source of password to encrypt the data" - echo " using OpenSSL. See \"PASS PHRASE ARGUMENTS\" in man openssl." - echo " If this option is not supplied, the user will be asked to enter" - echo " encryption password on the current terminal." - echo " --ssl-no-md : Do not use \"-md\" option not supported by older OpenSSL." - echo " --nochown : Do not give the target folder to the current user (default)" - echo " --chown : Give the target folder to the current user recursively" - echo " --nocomp : Do not compress the data" - echo " --notemp : The archive will create archive_dir in the" - echo " current directory and uncompress in ./archive_dir" - echo " --needroot : Check that the root user is extracting the archive before proceeding" - echo " --copy : Upon extraction, the archive will first copy itself to" - echo " a temporary directory" - echo " --append : Append more files to an existing Makeself archive" - echo " The label and startup scripts will then be ignored" - echo " --target dir : Extract directly to a target directory" - echo " directory path can be either absolute or relative" - echo " --nooverwrite : Do not extract the archive if the specified target directory exists" - echo " --current : Files will be extracted to the current directory" - echo " Both --current and --target imply --notemp" - echo " --tar-format opt : Specify a tar archive format (default is ustar)" - echo " --tar-extra opt : Append more options to the tar command line" - echo " --untar-extra opt : Append more options to the during the extraction of the tar archive" - echo " --nomd5 : Don't calculate an MD5 for archive" - echo " --nocrc : Don't calculate a CRC for archive" - echo " --sha256 : Compute a SHA256 checksum for the archive" - echo " --header file : Specify location of the header script" - echo " --cleanup file : Specify a cleanup script that executes on interrupt and when finished successfully." - echo " --follow : Follow the symlinks in the archive" - echo " --noprogress : Do not show the progress during the decompression" - echo " --nox11 : Disable automatic spawn of a xterm" - echo " --nowait : Do not wait for user input after executing embedded" - echo " program from an xterm" - echo " --sign passphrase : Signature private key to sign the package with" - echo " --lsm file : LSM file describing the package" - echo " --license file : Append a license file" - echo " --help-header file : Add a header to the archive's --help output" - echo " --packaging-date date" - echo " : Use provided string as the packaging date" - echo " instead of the current date." - echo - echo " --keep-umask : Keep the umask set to shell default, rather than overriding when executing self-extracting archive." - echo " --export-conf : Export configuration variables to startup_script" - echo - echo "Do not forget to give a fully qualified startup script name" - echo "(i.e. with a ./ prefix if inside the archive)." - exit 1 -} - -# Default settings -if type gzip >/dev/null 2>&1; then - COMPRESS=gzip -elif type compress >/dev/null 2>&1; then - COMPRESS=compress -else - echo "ERROR: missing commands: gzip, compress" >&2 - MS_Usage -fi -ENCRYPT=n -PASSWD="" -PASSWD_SRC="" -OPENSSL_NO_MD=n -COMPRESS_LEVEL=9 -DEFAULT_THREADS=123456 # Sentinel value -THREADS=$DEFAULT_THREADS -KEEP=n -CURRENT=n -NOX11=n -NOWAIT=n -APPEND=n -TAR_QUIETLY=n -KEEP_UMASK=n -QUIET=n -NOPROGRESS=n -COPY=none -NEED_ROOT=n -TAR_ARGS=rvf -TAR_FORMAT=ustar -TAR_EXTRA="" -GPG_EXTRA="" -DU_ARGS=-ks -HEADER=`dirname "$0"`/makeself-header.sh -SIGNATURE="" -TARGETDIR="" -NOOVERWRITE=n -DATE=`LC_ALL=C date` -EXPORT_CONF=n -SHA256=n -OWNERSHIP=n -SIGN=n -GPG_PASSPHRASE="" - -# LSM file stuff -LSM_CMD="echo No LSM. >> \"\$archname\"" - -while true -do - case "$1" in - --version | -v) - echo Makeself version $MS_VERSION - exit 0 - ;; - --pbzip2) - COMPRESS=pbzip2 - shift - ;; - --bzip2) - COMPRESS=bzip2 - shift - ;; - --gzip) - COMPRESS=gzip - shift - ;; - --pigz) - COMPRESS=pigz - shift - ;; - --zstd) - COMPRESS=zstd - shift - ;; - --xz) - COMPRESS=xz - shift - ;; - --lzo) - COMPRESS=lzo - shift - ;; - --lz4) - COMPRESS=lz4 - shift - ;; - --compress) - COMPRESS=compress - shift - ;; - --base64) - COMPRESS=base64 - shift - ;; - --gpg-encrypt) - COMPRESS=gpg - shift - ;; - --gpg-asymmetric-encrypt-sign) - COMPRESS=gpg-asymmetric - shift - ;; - --gpg-extra) - GPG_EXTRA="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --ssl-encrypt) - ENCRYPT=openssl - shift - ;; - --ssl-passwd) - PASSWD=$2 - shift 2 || { MS_Usage; exit 1; } - ;; - --ssl-pass-src) - PASSWD_SRC=$2 - shift 2 || { MS_Usage; exit 1; } - ;; - --ssl-no-md) - OPENSSL_NO_MD=y - shift - ;; - --nocomp) - COMPRESS=none - shift - ;; - --complevel) - COMPRESS_LEVEL="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --threads) - THREADS="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --nochown) - OWNERSHIP=n - shift - ;; - --chown) - OWNERSHIP=y - shift - ;; - --notemp) - KEEP=y - shift - ;; - --copy) - COPY=copy - shift - ;; - --current) - CURRENT=y - KEEP=y - shift - ;; - --tar-format) - TAR_FORMAT="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --tar-extra) - TAR_EXTRA="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --untar-extra) - UNTAR_EXTRA="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --target) - TARGETDIR="$2" - KEEP=y - shift 2 || { MS_Usage; exit 1; } - ;; - --sign) - SIGN=y - GPG_PASSPHRASE="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --nooverwrite) - NOOVERWRITE=y - shift - ;; - --needroot) - NEED_ROOT=y - shift - ;; - --header) - HEADER="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --cleanup) - CLEANUP_SCRIPT="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --license) - # We need to escape all characters having a special meaning in double quotes - LICENSE=$(sed 's/\\/\\\\/g; s/"/\\\"/g; s/`/\\\`/g; s/\$/\\\$/g' "$2") - shift 2 || { MS_Usage; exit 1; } - ;; - --follow) - TAR_ARGS=rvhf - DU_ARGS=-ksL - shift - ;; - --noprogress) - NOPROGRESS=y - shift - ;; - --nox11) - NOX11=y - shift - ;; - --nowait) - NOWAIT=y - shift - ;; - --nomd5) - NOMD5=y - shift - ;; - --sha256) - SHA256=y - shift - ;; - --nocrc) - NOCRC=y - shift - ;; - --append) - APPEND=y - shift - ;; - --lsm) - LSM_CMD="cat \"$2\" >> \"\$archname\"" - shift 2 || { MS_Usage; exit 1; } - ;; - --packaging-date) - DATE="$2" - shift 2 || { MS_Usage; exit 1; } - ;; - --help-header) - HELPHEADER=`sed -e "s/'/'\\\\\''/g" $2` - shift 2 || { MS_Usage; exit 1; } - [ -n "$HELPHEADER" ] && HELPHEADER="$HELPHEADER -" - ;; - --tar-quietly) - TAR_QUIETLY=y - shift - ;; - --keep-umask) - KEEP_UMASK=y - shift - ;; - --export-conf) - EXPORT_CONF=y - shift - ;; - -q | --quiet) - QUIET=y - shift - ;; - -h | --help) - MS_Usage - ;; - -*) - echo Unrecognized flag : "$1" - MS_Usage - ;; - *) - break - ;; - esac -done - -if test $# -lt 1; then - MS_Usage -else - if test -d "$1"; then - archdir="$1" - else - echo "Directory $1 does not exist." >&2 - exit 1 - fi -fi -archname="$2" - -if test "$QUIET" = "y" || test "$TAR_QUIETLY" = "y"; then - if test "$TAR_ARGS" = "rvf"; then - TAR_ARGS="rf" - elif test "$TAR_ARGS" = "rvhf"; then - TAR_ARGS="rhf" - fi -fi - -if test "$APPEND" = y; then - if test $# -lt 2; then - MS_Usage - fi - - # Gather the info from the original archive - OLDENV=`sh "$archname" --dumpconf` - if test $? -ne 0; then - echo "Unable to update archive: $archname" >&2 - exit 1 - else - eval "$OLDENV" - OLDSKIP=`expr $SKIP + 1` - fi -else - if test "$KEEP" = n -a $# = 3; then - echo "ERROR: Making a temporary archive with no embedded command does not make sense!" >&2 - echo >&2 - MS_Usage - fi - # We don't want to create an absolute directory unless a target directory is defined - if test "$CURRENT" = y; then - archdirname="." - elif test x"$TARGETDIR" != x; then - archdirname="$TARGETDIR" - else - archdirname=`basename "$1"` - fi - - if test $# -lt 3; then - MS_Usage - fi - - LABEL="$3" - SCRIPT="$4" - test "x$SCRIPT" = x || shift 1 - shift 3 - SCRIPTARGS="$*" -fi - -if test "$KEEP" = n -a "$CURRENT" = y; then - echo "ERROR: It is A VERY DANGEROUS IDEA to try to combine --notemp and --current." >&2 - exit 1 -fi - -case $COMPRESS in -gzip) - GZIP_CMD="gzip -c$COMPRESS_LEVEL" - GUNZIP_CMD="gzip -cd" - ;; -pigz) - GZIP_CMD="pigz -$COMPRESS_LEVEL" - if test $THREADS -ne $DEFAULT_THREADS; then # Leave as the default if threads not indicated - GZIP_CMD="$GZIP_CMD --processes $THREADS" - fi - GUNZIP_CMD="gzip -cd" - ;; -zstd) - GZIP_CMD="zstd -$COMPRESS_LEVEL" - if test $THREADS -ne $DEFAULT_THREADS; then # Leave as the default if threads not indicated - GZIP_CMD="$GZIP_CMD --threads=$THREADS" - fi - GUNZIP_CMD="zstd -cd" - ;; -pbzip2) - GZIP_CMD="pbzip2 -c$COMPRESS_LEVEL" - if test $THREADS -ne $DEFAULT_THREADS; then # Leave as the default if threads not indicated - GZIP_CMD="$GZIP_CMD -p$THREADS" - fi - GUNZIP_CMD="bzip2 -d" - ;; -bzip2) - GZIP_CMD="bzip2 -$COMPRESS_LEVEL" - GUNZIP_CMD="bzip2 -d" - ;; -xz) - GZIP_CMD="xz -c$COMPRESS_LEVEL" - # Must opt-in by specifying a value since not all versions of xz support threads - if test $THREADS -ne $DEFAULT_THREADS; then - GZIP_CMD="$GZIP_CMD --threads=$THREADS" - fi - GUNZIP_CMD="xz -d" - ;; -lzo) - GZIP_CMD="lzop -c$COMPRESS_LEVEL" - GUNZIP_CMD="lzop -d" - ;; -lz4) - GZIP_CMD="lz4 -c$COMPRESS_LEVEL" - GUNZIP_CMD="lz4 -d" - ;; -base64) - GZIP_CMD="base64" - GUNZIP_CMD="base64 --decode -i -" - ;; -gpg) - GZIP_CMD="gpg $GPG_EXTRA -ac -z$COMPRESS_LEVEL" - GUNZIP_CMD="gpg -d" - ENCRYPT="gpg" - ;; -gpg-asymmetric) - GZIP_CMD="gpg $GPG_EXTRA -z$COMPRESS_LEVEL -es" - GUNZIP_CMD="gpg --yes -d" - ENCRYPT="gpg" - ;; -compress) - GZIP_CMD="compress -fc" - GUNZIP_CMD="(type compress >/dev/null 2>&1 && compress -fcd || gzip -cd)" - ;; -none) - GZIP_CMD="cat" - GUNZIP_CMD="cat" - ;; -esac - -if test x"$ENCRYPT" = x"openssl"; then - if test x"$APPEND" = x"y"; then - echo "Appending to existing archive is not compatible with OpenSSL encryption." >&2 - fi - - ENCRYPT_CMD="openssl enc -aes-256-cbc -salt" - DECRYPT_CMD="openssl enc -aes-256-cbc -d" - - if test x"$OPENSSL_NO_MD" != x"y"; then - ENCRYPT_CMD="$ENCRYPT_CMD -md sha256" - DECRYPT_CMD="$DECRYPT_CMD -md sha256" - fi - - if test -n "$PASSWD_SRC"; then - ENCRYPT_CMD="$ENCRYPT_CMD -pass $PASSWD_SRC" - elif test -n "$PASSWD"; then - ENCRYPT_CMD="$ENCRYPT_CMD -pass pass:$PASSWD" - fi -fi - -tmpfile="${TMPDIR:-/tmp}/mkself$$" - -if test -f "$HEADER"; then - oldarchname="$archname" - archname="$tmpfile" - # Generate a fake header to count its lines - SKIP=0 - . "$HEADER" - SKIP=`cat "$tmpfile" |wc -l` - # Get rid of any spaces - SKIP=`expr $SKIP` - rm -f "$tmpfile" - if test "$QUIET" = "n"; then - echo "Header is $SKIP lines long" >&2 - fi - archname="$oldarchname" -else - echo "Unable to open header file: $HEADER" >&2 - exit 1 -fi - -if test "$QUIET" = "n"; then - echo -fi - -if test "$APPEND" = n; then - if test -f "$archname"; then - echo "WARNING: Overwriting existing file: $archname" >&2 - fi -fi - -USIZE=`du $DU_ARGS "$archdir" | awk '{print $1}'` - -if test "." = "$archdirname"; then - if test "$KEEP" = n; then - archdirname="makeself-$$-`date +%Y%m%d%H%M%S`" - fi -fi - -test -d "$archdir" || { echo "Error: $archdir does not exist."; rm -f "$tmpfile"; exit 1; } -if test "$QUIET" = "n"; then - echo "About to compress $USIZE KB of data..." - echo "Adding files to archive named \"$archname\"..." -fi - -# See if we have GNU tar -TAR=`exec <&- 2>&-; which gtar || command -v gtar || type gtar` -test -x "$TAR" || TAR=tar - -tmparch="${TMPDIR:-/tmp}/mkself$$.tar" -( - if test "$APPEND" = "y"; then - tail -n "+$OLDSKIP" "$archname" | eval "$GUNZIP_CMD" > "$tmparch" - fi - cd "$archdir" - # "Determining if a directory is empty" - # https://www.etalabs.net/sh_tricks.html - find . \ - \( \ - ! -type d \ - -o \ - \( -links 2 -exec sh -c ' - is_empty () ( - cd "$1" - set -- .[!.]* ; test -f "$1" && return 1 - set -- ..?* ; test -f "$1" && return 1 - set -- * ; test -f "$1" && return 1 - return 0 - ) - is_empty "$0"' {} \; \ - \) \ - \) -print \ - | LC_ALL=C sort \ - | sed 's/./\\&/g' \ - | xargs $TAR $TAR_EXTRA --format $TAR_FORMAT -$TAR_ARGS "$tmparch" -) || { - echo "ERROR: failed to create temporary archive: $tmparch" - rm -f "$tmparch" "$tmpfile" - exit 1 -} - -USIZE=`du $DU_ARGS "$tmparch" | awk '{print $1}'` - -eval "$GZIP_CMD" <"$tmparch" >"$tmpfile" || { - echo "ERROR: failed to create temporary file: $tmpfile" - rm -f "$tmparch" "$tmpfile" - exit 1 -} -rm -f "$tmparch" - -if test x"$ENCRYPT" = x"openssl"; then - echo "About to encrypt archive \"$archname\"..." - { eval "$ENCRYPT_CMD -in $tmpfile -out ${tmpfile}.enc" && mv -f ${tmpfile}.enc $tmpfile; } || \ - { echo Aborting: could not encrypt temporary file: "$tmpfile".; rm -f "$tmpfile"; exit 1; } -fi - -fsize=`cat "$tmpfile" | wc -c | tr -d " "` - -# Compute the checksums - -shasum=0000000000000000000000000000000000000000000000000000000000000000 -md5sum=00000000000000000000000000000000 -crcsum=0000000000 - -if test "$NOCRC" = y; then - if test "$QUIET" = "n"; then - echo "skipping crc at user request" - fi -else - crcsum=`CMD_ENV=xpg4 cksum < "$tmpfile" | sed -e 's/ /Z/' -e 's/ /Z/' | cut -dZ -f1` - if test "$QUIET" = "n"; then - echo "CRC: $crcsum" - fi -fi - -if test "$SHA256" = y; then - SHA_PATH=`exec <&- 2>&-; which shasum || command -v shasum || type shasum` - if test -x "$SHA_PATH"; then - shasum=`eval "$SHA_PATH -a 256" < "$tmpfile" | cut -b-64` - else - SHA_PATH=`exec <&- 2>&-; which sha256sum || command -v sha256sum || type sha256sum` - shasum=`eval "$SHA_PATH" < "$tmpfile" | cut -b-64` - fi - if test "$QUIET" = "n"; then - if test -x "$SHA_PATH"; then - echo "SHA256: $shasum" - else - echo "SHA256: none, SHA command not found" - fi - fi -fi -if test "$NOMD5" = y; then - if test "$QUIET" = "n"; then - echo "Skipping md5sum at user request" - fi -else - # Try to locate a MD5 binary - OLD_PATH=$PATH - PATH=${GUESS_MD5_PATH:-"$OLD_PATH:/bin:/usr/bin:/sbin:/usr/local/ssl/bin:/usr/local/bin:/opt/openssl/bin"} - MD5_ARG="" - MD5_PATH=`exec <&- 2>&-; which md5sum || command -v md5sum || type md5sum` - test -x "$MD5_PATH" || MD5_PATH=`exec <&- 2>&-; which md5 || command -v md5 || type md5` - test -x "$MD5_PATH" || MD5_PATH=`exec <&- 2>&-; which digest || command -v digest || type digest` - PATH=$OLD_PATH - if test -x "$MD5_PATH"; then - if test `basename ${MD5_PATH}`x = digestx; then - MD5_ARG="-a md5" - fi - md5sum=`eval "$MD5_PATH $MD5_ARG" < "$tmpfile" | cut -b-32` - if test "$QUIET" = "n"; then - echo "MD5: $md5sum" - fi - else - if test "$QUIET" = "n"; then - echo "MD5: none, MD5 command not found" - fi - fi -fi -if test "$SIGN" = y; then - GPG_PATH=`exec <&- 2>&-; which gpg || command -v gpg || type gpg` - if test -x "$GPG_PATH"; then - SIGNATURE=`$GPG_PATH --pinentry-mode=loopback --batch --yes --passphrase "$GPG_PASSPHRASE" --output - --detach-sig $tmpfile | base64 | tr -d \\\\n` - if test "$QUIET" = "n"; then - echo "Signature: $SIGNATURE" - fi - else - echo "Missing gpg command" >&2 - fi -fi - -totalsize=0 -for size in $fsize; -do - totalsize=`expr $totalsize + $size` -done - -if test "$APPEND" = y; then - mv "$archname" "$archname".bak || exit - - # Prepare entry for new archive - filesizes="$fsize" - CRCsum="$crcsum" - MD5sum="$md5sum" - SHAsum="$shasum" - Signature="$SIGNATURE" - # Generate the header - . "$HEADER" - # Append the new data - cat "$tmpfile" >> "$archname" - - chmod +x "$archname" - rm -f "$archname".bak - if test "$QUIET" = "n"; then - echo "Self-extractable archive \"$archname\" successfully updated." - fi -else - filesizes="$fsize" - CRCsum="$crcsum" - MD5sum="$md5sum" - SHAsum="$shasum" - Signature="$SIGNATURE" - - # Generate the header - . "$HEADER" - - # Append the compressed tar data after the stub - if test "$QUIET" = "n"; then - echo - fi - cat "$tmpfile" >> "$archname" - chmod +x "$archname" - if test "$QUIET" = "n"; then - echo Self-extractable archive \"$archname\" successfully created. - fi -fi -rm -f "$tmpfile" diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/run-tests.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/run-tests.sh deleted file mode 100755 index 31ee1651156c64caddfdadd683d4dc2d0be3ddc4..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/makeself/run-tests.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# Run every available test - Bash needed -cd test -for test in *test; -do - echo "Running test $test ..." - bash $test || { echo "*** ERROR: Test '$test' failed!"; exit 1; } -done diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/merge_aicpu_info_json.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/merge_aicpu_info_json.sh deleted file mode 100755 index a977bd51d2e98a1511db4296070a8dda6b90a262..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/merge_aicpu_info_json.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -project_path=$1 -build_path=$2 -vendor_name=customize -echo $@ -if [[ ! -d "$project_path" ]]; then - echo "[ERROR] No projcet path is provided" - exit 1 -fi - -if [[ ! -d "$build_path" ]]; then - echo "[ERROR] No build path is provided" - exit 1 -fi - -if [[ ! -d "$ASCEND_OPP_PATH" ]]; then - echo "[ERROR] No opp install path is provided" - exit 1 -fi -custom_exist_info_json=$ASCEND_OPP_PATH/vendors/$vendor_name/op_impl/cpu/config/cust_aicpu_kernel.json -custom_new_info_json=$build_path/makepkg/packages/vendors/$vendor_name/op_impl/cpu/config/cust_aicpu_kernel.json -temp_info_json=$build_path/makepkg/packages/vendors/$vendor_name/op_impl/cpu/config/temp_cust_aicpu_kernel.json - -if [[ -f "$custom_exist_info_json" ]] && [[ -f "$custom_new_info_json" ]]; then - cp -f $custom_exist_info_json $temp_info_json - chmod +w $temp_info_json - python3 ${project_path}/cmake/util/insert_op_info.py ${custom_new_info_json} ${temp_info_json} - cp -f $temp_info_json $custom_new_info_json - rm -f $temp_info_json -fi diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/opdesc_parser.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/opdesc_parser.py deleted file mode 100755 index c38e79edc89a80b528581afafdbc119db68ea590..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/opdesc_parser.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import sys -import os - - -OP_ALL = '__ALLOP__' -SOC_ALL = '__ALLSOC__' -SOC_TO_SHORT_SOC_MAP = { - "ascend910a": "ascend910", - "ascend910proa": "ascend910", - "ascend910b": "ascend910", - "ascend910prob": "ascend910", - "ascend910premiuma": "ascend910", - "ascend910b1": "ascend910b", - "ascend910b2": "ascend910b", - "ascend910b3": "ascend910b", - "ascend910b4": "ascend910b", - "ascend310p1": "ascend310p", - "ascend310p3": "ascend310p", - "ascend310p3vir01": "ascend310p", - "ascend310p3vir02": "ascend310p", - "ascend310p3vir04": "ascend310p", - "ascend310p3vir08": "ascend310p", - "ascend310b1": "ascend310b", - "bs9sx1aa": "bs9sx1a" -} - - -class OpDesc: - def __init__(self: any, op_type: str): - self.op_type = op_type - self.attr_list = [] - self.attr_val = {} - self.input_name = [] - self.input_type = [] - self.input_dtype = [] - self.input_fmt = [] - self.output_name = [] - self.output_type = [] - self.output_dtype = [] - self.output_fmt = [] - self.op_fmt_sel = False - self.op_chk_support = False - self.op_intf = '' - self.kern_name = '' - self.op_file = '' - self.op_replay_flag = False - self.op_replay_batch = False - self.input_idx = -1 - self.output_idx = -1 - self.max_block_dim = 32 - self.max_shape_size = 268435456 - self.dynamic_shape = False - self.op_range_limit = '' - self.custom_compile_options = {} - self.custom_all_compile_options = {} - - @staticmethod - def _parse_digit(conf: str) -> int: - return int(conf.split('=')[1]) - - @staticmethod - def _parse_flag(conf: str) -> bool: - if 'true' == conf.split('=')[1]: - return True - return False - - @staticmethod - def _parse_str(conf: str) -> str: - return conf.split('=')[1] - - @staticmethod - def _parse_list(conf: str) -> list: - return conf.split('=')[1].split(',') - - def parse_input(self: any, conf: str): - if conf.startswith('input{}.name'.format(int(self.input_idx) + 1)): - self.input_idx += 1 - self.input_name.append(self._parse_str(conf)) - elif conf.startswith('input{}.paramType'.format(int(self.input_idx))): - self.input_type.append(self._parse_str(conf)) - elif conf.startswith('input{}.dtype'.format(int(self.input_idx))): - self.input_dtype.append(self._parse_str(conf)) - elif conf.startswith('input{}.format'.format(int(self.input_idx))): - self.input_fmt.append(self._parse_str(conf)) - else: - return - - def parse_output(self: any, conf: str): - if conf.startswith('output{}.name'.format(int(self.output_idx) + 1)): - self.output_idx += 1 - self.output_name.append(self._parse_str(conf)) - elif conf.startswith('output{}.paramType'.format(int(self.output_idx))): - self.output_type.append(self._parse_str(conf)) - elif conf.startswith('output{}.dtype'.format(int(self.output_idx))): - self.output_dtype.append(self._parse_str(conf)) - elif conf.startswith('output{}.format'.format(int(self.output_idx))): - self.output_fmt.append(self._parse_str(conf)) - else: - return - - def parse_op_format(self: any, conf: str): - self.op_fmt_sel = self._parse_flag(conf) - - def parse_check_support(self: any, conf: str): - self.op_chk_support = self._parse_flag(conf) - - def parse_range_limit(self: any, conf: str): - self.op_range_limit = self._parse_str(conf) - - def parse_kern_name(self: any, conf: str): - self.kern_name = self._parse_str(conf) - - def parse_op_intf(self: any, conf: str): - self.op_intf = self._parse_str(conf) - - def parse_op_file(self: any, conf: str): - self.op_file = self._parse_str(conf) - - def parse_dynamic_shape(self: any, conf: str): - self.dynamic_shape = self._parse_flag(conf) - - def parse_attr_list(self: any, conf: str): - self.attr_list = self._parse_list(conf) - - def parse_attr_val(self: any, conf: str): - for attr in self.attr_list: - if self.attr_val.get(attr) is None: - self.attr_val[attr] = {} - if conf.startswith('attr_{}.type'.format(attr)): - self.attr_val.get(attr)['type'] = self._parse_str(conf) - elif conf.startswith('attr_{}.paramType'.format(attr)): - self.attr_val.get(attr)['paramType'] = self._parse_str(conf) - elif conf.startswith('attr_{}.defaultValue'.format(attr)): - self.attr_val.get(attr)['defaultValue'] = self._parse_str(conf) - - def parse_replay_val(self: any, batch_list: list, iterator_list: list): - if self.op_type in batch_list: - self.op_replay_flag = True - self.op_replay_batch = True - elif self.op_type in iterator_list: - self.op_replay_flag = True - self.op_replay_batch = False - - -def _is_op_type_in_opdesc(op_descs: list, op_type: str): - for op in op_descs: - if op_type == op.op_type: - return True - return False - - -def _set_all_options_to_opdescs(op_descs, soc_ver_compile_options): - for op in op_descs: - op.custom_all_compile_options = soc_ver_compile_options - - -def _set_options_to_opdesc(op_descs, op_type, soc_ver_compile_options): - for op in op_descs: - if op.op_type != op_type: - continue - op.custom_compile_options = soc_ver_compile_options - - -def _trans_soc_ver_to_short(soc_ver: str): - low_soc_ver = soc_ver.lower() - if low_soc_ver not in SOC_TO_SHORT_SOC_MAP: - print(f'WARNING: caution: {soc_ver} will trans into ascend910, if not your intention,' - f'use ascend910b1~4 instead') - return SOC_TO_SHORT_SOC_MAP[low_soc_ver] - - -def _get_op_custom_options(op_descs: list, auto_gen_dir: str): - if auto_gen_dir is None: - return {} - file = os.path.join(auto_gen_dir, "custom_compile_options.ini") - if not os.path.exists(file): - print(f'WARNING: cannot find {auto_gen_dir}/custom_compile_options.ini') - return {} - with open (file, 'r') as fd: - lines = fd.readlines() - for line in lines: - param_list = str.split(line.rstrip('\n'), ',') - if len(param_list) != 3: - raise Exception(f'ERROR: custom compile option {param_list} len is not 3') - op_type = param_list[0] - if op_type.upper() == 'ALL': - op_type = OP_ALL - if op_type != OP_ALL and _is_op_type_in_opdesc(op_descs, op_type) == False: - print(f'WARNING: op: {op_type} are not exists in this project') - continue - soc_ver_compile_options = {} - soc_ver = param_list[1] - options_str = param_list[2] - options = str.split(options_str, ';') - if soc_ver == '': - soc_ver_compile_options[SOC_ALL] = options - else: - soc_ver_list = str.split(soc_ver, ';') - for ver in soc_ver_list: - short_ver = _trans_soc_ver_to_short(ver) - soc_ver_compile_options[short_ver] = options - if op_type == OP_ALL: - _set_all_options_to_opdescs(op_descs, soc_ver_compile_options) - else: - _set_options_to_opdesc(op_descs, op_type, soc_ver_compile_options) - - -def get_op_desc(file: str, batch_list: list, iterator_list: list, builder: any, - op_type: list, auto_gen_dir: str = None) -> list: - op_descs = [] - op_match = False - with open (file, 'r') as fd: - lines = fd.readlines() - for line in lines: - line = line.strip() - if line.startswith('['): - name = line[1:-1] - if op_type is None or name in op_type: - op_match = True - op_desc = builder(name) - op_desc.parse_replay_val(batch_list, iterator_list) - op_descs.append(op_desc) - else: - op_match = False - if op_type is not None and len(op_descs) == len(op_type): - return op_descs - continue - if not op_match: - continue - if line.startswith('input'): - op_desc.parse_input(line) - elif line.startswith('output'): - op_desc.parse_output(line) - elif line.startswith('dynamicFormat.flag'): - op_desc.parse_op_format(line) - elif line.startswith('needCheckSupport.flag'): - op_desc.parse_check_support(line) - elif line.startswith('rangeLimit.value'): - op_desc.parse_range_limit(line) - elif line.startswith('opInterface.value'): - op_desc.parse_op_intf(line) - elif line.startswith('kernel.name'): - op_desc.parse_kern_name(line) - elif line.startswith('opFile.value'): - op_desc.parse_op_file(line) - elif line.startswith('dynamicShapeSupport.flag'): - op_desc.parse_dynamic_shape(line) - elif line.startswith('attr.list'): - op_desc.parse_attr_list(line) - elif line.startswith('attr_'): - op_desc.parse_attr_val(line) - _get_op_custom_options(op_descs, auto_gen_dir) - return op_descs diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/parse_ini_to_json.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/parse_ini_to_json.py deleted file mode 100755 index 721465fee306bfca8895e2991ba06d1547c9eb2b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/parse_ini_to_json.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright 2020-2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -parser ini to json -""" - -import json -import os -import stat -import sys - - -ATTR_TYPE_LIST = ["int", "float", "bool", "str", "listInt", "listFloat", "listBool", "listStr", "listListInt", - "type", "listType", "tensor", "listTensor"] -ATTR_PARAMTYPE_LIST = ["optional", "required"] -BOOL_FLAG_KEY = ["dynamicFormat", "dynamicShapeSupport", "dynamicRankSupport", "precision_reduce", "heavyOp", - "needCheckSupport"] -BOOL_LIST = ["true", "false"] -DTYPE_LIST = ["float16", "float", "float32", "int8", "int16", "int32", "uint8", "uint16", "uint32", "bool", - "int64", "uint64", "qint8", "qint16", "qint32", "quint8", "quint16", "double", "complex64", - "complex128", "string", "resource", "dual", "dual_sub_int8", "dual_sub_uint8", "string_ref", - "int4", "bfloat16", "uint1"] -FORMAT_LIST = ["NCHW", "NHWC", "ND", "NC1HWC0", "FRACTAL_Z", "NC1C0HWPAD", "NHWC1C0", "FSR_NCHW", "FRACTAL_DECONV", - "C1HWNC0", "FRACTAL_DECONV_TRANSPOSE", "FRACTAL_DECONV_SP_STRIDE_TRANS", "NC1HWC0_C04", - "FRACTAL_Z_C04", "CHWN", "FRACTAL_DECONV_SP_STRIDE8_TRANS", "HWCN", "NC1KHKWHWC0", "BN_WEIGHT", - "FILTER_HWCK", "HASHTABLE_LOOKUP_LOOKUPS", "HASHTABLE_LOOKUP_KEYS", "HASHTABLE_LOOKUP_VALUE", - "HASHTABLE_LOOKUP_OUTPUT", "HASHTABLE_LOOKUP_HITS", "C1HWNCoC0", "MD", "NDHWC", "FRACTAL_ZZ", - "FRACTAL_NZ", "NCDHW", "DHWCN", "NDC1HWC0", "FRACTAL_Z_3D", "CN", "NC", "DHWNC", - "FRACTAL_Z_3D_TRANSPOSE", "FRACTAL_ZN_LSTM", "FRACTAL_ZN_RNN", "FRACTAL_Z_G", "NULL"] - - -def parse_ini_files(ini_files): - """ - parse ini files to json - Parameters: - ---------------- - ini_files:input file list - return:ops_info - ---------------- - """ - tbe_ops_info = {} - for ini_file in ini_files: - check_file_size(ini_file) - parse_ini_to_obj(ini_file, tbe_ops_info) - return tbe_ops_info - - -def check_file_size(input_file): - try: - file_size = os.path.getsize(input_file) - except OSError as os_error: - print('[ERROR] Failed to open "%s". %s' % (input_file, str(os_error))) - raise OSError from os_error - if file_size > 10*1024*1024: - print('[WARN] The size of %s exceeds 10MB, it may take more time to run, please wait.' % input_file) - - -def parse_ini_to_obj(ini_file, tbe_ops_info): - """ - parse ini file to json obj - Parameters: - ---------------- - ini_file:ini file path - tbe_ops_info:ops_info - ---------------- - """ - with open(ini_file) as ini_file: - lines = ini_file.readlines() - op_dict = {} - op_name = "" - find_op_type = False - for line in lines: - line = line.rstrip() - if line == "": - continue - if line.startswith("["): - if line.endswith("]"): - op_name = line[1:-1] - op_dict = {} - tbe_ops_info[op_name] = op_dict - find_op_type = True - elif "=" in line: - key1 = line[:line.index("=")] - key2 = line[line.index("=")+1:] - key1_0, key1_1 = key1.split(".") - if key1_0 not in op_dict: - op_dict[key1_0] = {} - if key1_1 in op_dict.get(key1_0): - raise RuntimeError("Op:" + op_name + " " + key1_0 + " " + - key1_1 + " is repeated!") - dic_key = op_dict.get(key1_0) - dic_key[key1_1] = key2 - else: - continue - if not find_op_type: - raise RuntimeError("Not find OpType in .ini file.") - - -def check_output_exist(op_dict, is_valid): - """ - Function Description: - Check output is exist - Parameter: op_dict - Parameter: is_valid - """ - if "output0" in op_dict: - output0_dict = op_dict.get("output0") - if output0_dict.get("name", None) is None: - is_valid = False - print("output0.name is required in .ini file!") - else: - is_valid = False - print("output0 is required in .ini file!") - return is_valid - - -def check_attr_dict(attr_dict, is_valid, attr): - """ - Function Description: - Check attr_dict - Parameter: attr_dict - Parameter: is_valid - Parameter: attr - """ - attr_type = attr_dict.get("type") - value = attr_dict.get("value") - param_type = attr_dict.get("paramType") - if attr_type is None or value is None: - is_valid = False - print("If attr.list is exist, {0}.type and {0}.value is required".format(attr)) - if param_type and param_type not in ATTR_PARAMTYPE_LIST: - is_valid = False - print("{0}.paramType only support {1}.".format(attr, ATTR_PARAMTYPE_LIST)) - if attr_type and attr_type not in ATTR_TYPE_LIST: - is_valid = False - print("{0}.type only support {1}.".format(attr, ATTR_TYPE_LIST)) - return is_valid - - -def check_attr(op_dict, is_valid): - """ - Function Description: - Check attr - Parameter: op_dict - Parameter: is_valid - """ - if "attr" in op_dict: - attr_dict = op_dict.get("attr") - attr_list_str = attr_dict.get("list", None) - if attr_list_str is None: - is_valid = False - print("attr.list is required in .ini file!") - else: - attr_list = attr_list_str.split(",") - for attr_name in attr_list: - attr = "attr_" + attr_name.strip() - attr_dict = op_dict.get(attr) - if attr_dict: - is_valid = check_attr_dict(attr_dict, is_valid, attr) - else: - is_valid = False - print("%s is required in .ini file, when attr.list is %s!" % (attr, attr_list_str)) - return is_valid - - -def check_bool_flag(op_dict, is_valid): - """ - Function Description: - check_bool_flag - Parameter: op_dict - Parameter: is_valid - """ - for key in BOOL_FLAG_KEY: - if key in op_dict: - op_bool_key = op_dict.get(key) - if op_bool_key.get("flag").strip() not in BOOL_LIST: - is_valid = False - print("{0}.flag only support {1}.".format(key, BOOL_LIST)) - return is_valid - - -def check_type_format(op_info, is_valid, op_info_key): - """ - Function Description: - Check type and format - Parameter: op_info - Parameter: is_valid - Parameter: op_info_key - """ - op_info_dtype_str = op_info.get("dtype") - op_info_dtype_num = 0 - op_info_format_num = 0 - if op_info_dtype_str: - op_info_dtype = op_info_dtype_str.split(",") - op_info_dtype_num = len(op_info_dtype) - for dtype in op_info_dtype: - if dtype.strip() not in DTYPE_LIST: - is_valid = False - print("{0}.dtype not support {1}.".format(op_info_key, dtype)) - op_info_format_str = op_info.get("format") - if op_info_format_str: - op_info_format = op_info_format_str.split(",") - op_info_format_num = len(op_info_format) - for op_format in op_info_format: - if op_format.strip() not in FORMAT_LIST: - is_valid = False - print("{0}.format not support {1}.".format(op_info_key, op_format)) - if op_info_dtype_num > 0 and op_info_format_num > 0: - if op_info_dtype_num != op_info_format_num: - is_valid = False - print("The number of {0}.dtype not match the number of {0}.format.".format(op_info_key)) - return is_valid - - -def check_op_info(tbe_ops): - """ - Function Description: - Check info. - Parameter: tbe_ops - Return Value: is_valid - """ - print("\n\n==============check valid for ops info start==============") - required_op_input_info_keys = ["paramType", "name"] - required_op_output_info_keys = ["paramType", "name"] - param_type_valid_value = ["dynamic", "optional", "required"] - is_valid = True - for op_key in tbe_ops: - op_dict = tbe_ops[op_key] - for op_info_key in op_dict: - if op_info_key.startswith("input"): - op_input_info = op_dict[op_info_key] - missing_keys = [] - for required_op_input_info_key in required_op_input_info_keys: - if required_op_input_info_key not in op_input_info: - missing_keys.append(required_op_input_info_key) - if len(missing_keys) > 0: - print("op: " + op_key + " " + op_info_key + " missing: " + - ",".join(missing_keys)) - is_valid = False - else: - if not op_input_info["paramType"] in param_type_valid_value: - print("op: " + op_key + " " + op_info_key + \ - " paramType not valid, valid key:[dynamic, " - "optional, required]") - is_valid = False - is_valid = check_type_format(op_input_info, is_valid, op_info_key) - if op_info_key.startswith("output"): - op_input_info = op_dict[op_info_key] - missing_keys = [] - for required_op_input_info_key in required_op_output_info_keys: - if required_op_input_info_key not in op_input_info: - missing_keys.append(required_op_input_info_key) - if len(missing_keys) > 0: - print("op: " + op_key + " " + op_info_key + " missing: " + - ",".join(missing_keys)) - is_valid = False - else: - if not op_input_info["paramType"] in param_type_valid_value: - print("op: " + op_key + " " + op_info_key + - " paramType not valid, valid key:[dynamic, " - "optional, required]") - is_valid = False - is_valid = check_type_format(op_input_info, is_valid, op_info_key) - is_valid = check_attr(op_dict, is_valid) - is_valid = check_bool_flag(op_dict, is_valid) - print("==============check valid for ops info end================\n\n") - return is_valid - - -def write_json_file(tbe_ops_info, json_file_path): - """ - Save info to json file - Parameters: - ---------------- - tbe_ops_info: ops_info - json_file_path: json file path - ---------------- - """ - json_file_real_path = os.path.realpath(json_file_path) - wr_flag = os.O_WRONLY | os.O_CREAT - wr_mode = stat.S_IWUSR | stat.S_IRUSR - with os.fdopen(os.open(json_file_real_path, wr_flag, wr_mode), 'w') as file_path: - # Only the owner and group have rights - os.chmod(json_file_real_path, stat.S_IWGRP + stat.S_IWUSR + stat.S_IRGRP - + stat.S_IRUSR) - json.dump(tbe_ops_info, file_path, sort_keys=True, indent=4, - separators=(',', ':')) - print("Compile op info cfg successfully.") - - -def parse_ini_to_json(ini_file_paths, outfile_path): - """ - parse ini files to json file - Parameters: - ---------------- - ini_file_paths: list of ini file path - outfile_path: output file path - ---------------- - """ - tbe_ops_info = parse_ini_files(ini_file_paths) - if not check_op_info(tbe_ops_info): - print("Compile op info cfg failed.") - return False - write_json_file(tbe_ops_info, outfile_path) - return True - - -if __name__ == '__main__': - args = sys.argv - - OUTPUT_FILE_PATH = "tbe_ops_info.json" - ini_file_path_list = [] - - for arg in args: - if arg.endswith("ini"): - ini_file_path_list.append(arg) - OUTPUT_FILE_PATH = arg.replace(".ini", ".json") - if arg.endswith("json"): - OUTPUT_FILE_PATH = arg - - if len(ini_file_path_list) == 0: - ini_file_path_list.append("tbe_ops_info.ini") - - if not parse_ini_to_json(ini_file_path_list, OUTPUT_FILE_PATH): - sys.exit(1) - sys.exit(0) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/preset_parse.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/preset_parse.py deleted file mode 100755 index 8f1124b1db30f552915958bc14066b8783f6ef74..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/preset_parse.py +++ /dev/null @@ -1,23 +0,0 @@ -import json -import sys -import os - - -def get_config_opts(file): - src_dir = os.path.abspath(os.path.dirname(file)) - opts = '' - with open(file, 'r') as fd: - config = json.load(fd) - for conf in config: - if conf == 'configurePresets': - for node in config[conf]: - macros = node.get('cacheVariables') - if macros is not None: - for key in macros: - opts += '-D{}={} '.format(key, macros[key]['value']) - opts = opts.replace('${sourceDir}', src_dir) - print(opts) - - -if __name__ == "__main__": - get_config_opts(sys.argv[1]) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_codegen.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_codegen.py deleted file mode 100755 index 1baa364ef8b3a7dee9ea1f9ce53e7c06641a2d02..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_codegen.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -""" -Created on Feb 28 20:56:45 2020 -Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. -""" - -import os -import stat -import collections -import kernel_entry as keb -from tiling_data_def_build import gen_tiling -import code_channel_infer -import const_var - -PYF_PATH = os.path.dirname(__file__) - -ReplayCodeGenParams = collections.namedtuple('ReplayCodeGenParams',\ -['op_type', 'impl', 'tiling_file', 'kernel', 'entry', 'argn', 'op_replay_batch', 'max_block_dim', 'max_shape_size']) - - -class ReplayCodeGen: - def __init__(self, replayCodeGenParams): - self.op_type = replayCodeGenParams.op_type - self.impl = replayCodeGenParams.impl - self.tiling_file = replayCodeGenParams.tiling_file - self.tiling_data_file = '' - self.kernel = replayCodeGenParams.kernel - self.entry = replayCodeGenParams.entry - self.argn = replayCodeGenParams.argn - self.batch = False - self.outdir = '' - self.data_type = 'uint8_t' - self.blknum = 32 - self.op_replay_batch = replayCodeGenParams.op_replay_batch - self.max_block_dim = replayCodeGenParams.max_block_dim - self.max_shape_size = replayCodeGenParams.max_shape_size - - def set_batch(self, is_batch): - self.batch = is_batch - - def set_outdir(self, outdir): - self.outdir = outdir - - def gen_replay(self, ops_product: str): - kerentry = os.path.join(self.outdir, self.kernel + '_entry.cce') - kerimpl = os.path.join(self.outdir, self.kernel + '_impl.cpp') - replayimpl = os.path.join(self.outdir, self.kernel + '_replay.cpp') - if self.batch: - reptmp = os.path.join(PYF_PATH, 'batch_replay_impl.temp') - else: - reptmp = os.path.join(PYF_PATH, 'replay_impl.temp') - kertmp = os.path.join(PYF_PATH, 'kernel_impl.temp') - self._gen_kentry(kerentry) - self._gen_kimpl_code(kerimpl, kertmp) - self._gen_tiling_data_header() - self._gen_replay_code(replayimpl, reptmp, ops_product) - - def _gen_tiling_data_header(self): - self.tiling_data_file = os.path.join(self.outdir, self.kernel + '_tiling_data.h') - gen_tiling(self.tiling_file, self.tiling_data_file) - - def _gen_kimpl_code(self, src, tmpfile): - with open(tmpfile, 'r') as fd: - temp = fd.read() - temp = temp.replace('__CCE_FILE__', self.impl) - with os.fdopen(os.open(src, const_var.WFLAGS, const_var.WMODES), 'w') as ofd: - ofd.write(temp) - - def _gen_replay_code(self, src, tmpfile, ops_product: str): - with open(tmpfile, 'r') as fd: - temp = fd.read() - temp = temp.replace('__ARG_NUM__', str(self.argn)) - argdef = [] - kargs = [] - for i in range(0, self.argn): - argdef.append('{} *'.format(self.data_type)) - kargs.append('({} *)GetArg({})'.format(self.data_type, i)) - temp = temp.replace('__ARGS_DEF__', ', '.join(argdef)) - temp = temp.replace('__KERNEL_ARGS__', ', '.join(kargs)) - temp = temp.replace('__KERNEL_FUN__', self.entry) - core_type_infer = 'core_type' - code_channel = code_channel_infer.infer_code_channel(code_channel_infer.InfoCodeChanelParams(self.impl,\ - self.tiling_data_file, self.kernel, self.outdir, ops_product, None)) - if code_channel == code_channel_infer.CODE_VEC: - core_type_infer = '0' - elif code_channel == code_channel_infer.CODE_CUBE: - core_type_infer = '1' - temp = temp.replace('__CORE_TYPE__', core_type_infer) - # regist function - temp = temp.replace('__OPS_PRODUCT__', ops_product) - temp = temp.replace('__OPTYPE__', self.op_type) - with os.fdopen(os.open(src, const_var.WFLAGS, const_var.WMODES), 'w') as ofd: - ofd.write(temp) - - def _gen_kentry(self, src): - kf = '' - pre_alloc_str = 'A' * 256 - if self.batch: - kf += keb.batch_code_gen("K{:02d}_{}{}".format(0, self.entry, pre_alloc_str), self.argn, self.data_type) - else: - kf += keb.mc_code_gen("K{:02d}_{}{}".format(0, self.entry, pre_alloc_str),\ - self.argn, self.data_type, self.blknum) - with os.fdopen(os.open(src, const_var.WFLAGS, const_var.WMODES), 'w') as ofd: - ofd.write(kf) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_impl.temp b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_impl.temp deleted file mode 100755 index 1d30dd8658e0a72200b3d5222e11d29170e21f57..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/replay_impl.temp +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "replay_def.h" -#include "code_gen.h" -#include "replay_fun.h" -#include "register/op_check.h" -#define __ASCENDC_REPLAY_CODE__ -using namespace std; -using namespace optiling; -using namespace AscendCReplay; - -extern "C" void __KERNEL_FUN__ (__ARGS_DEF__, const char *); -extern "C" int elf_append(char *elf, uint32_t elfSize, char *jit, int kernum, int blknum[], char *atext[], - int alen[], int atlen, const char* kernelname[]); - -#define KERNEL_N 1 -#define ARG_N (__ARG_NUM__) -#define MAX_L (1024 * 1024 * 100) -#define MAX_E (1024 * 1024) - -int __KERNEL_FUN___replay___OPS_PRODUCT__(ReplayFuncParam& param, const int core_type) -{ - // gen type 1 : direct call codes 0: load .o file - if (param.gentype < 0 || param.gentype > 1) { - printf("Error: call replay gen type is %d, should only be 1 or 0\n", param.gentype); - return 0; - } else if (param.gentype == 1 && param.objptr == nullptr) { - printf("Error: call replay with direct call mode, but code obj addr is null\n"); - return 0; - } else if (param.gentype == 0 && param.output_kernel_file == nullptr) { - printf("Error: call replay with object file mode, but object file path is null\n"); - return 0; - } - // core_type 0:MIX 1:CUBE 2:VEC - if (core_type < 0 || core_type > 2) { - printf("Error: call replay core type is %d !\n", core_type); - return 0; - } - g_coreType = __CORE_TYPE__; - g_taskRation = param.task_ration; - g_tilingKey = param.tiling_key; - - unsigned char *buf, *jit; - char *kernel[KERNEL_N * 32]; - int len[KERNEL_N * 32]; - int blknum[KERNEL_N]; - int max; - block_num = param.block_dim; - g_ubBase = block_num; - uint8_t *code = (uint8_t *)malloc(MAX_L); - uint8_t *pos = code; - struct timespec tp1, tp2; - - clock_gettime(CLOCK_MONOTONIC, &tp1); - if (block_num > 32) { - printf("Error: block_num > 32\n"); - return 0; - } - //__OP_FOPEN__ - for (int i = 0; i < KERNEL_N; i++) { - for (int j = 0; j < ARG_N; j++) - AddArg(j, ARG_STEP * (j + 1)); - for (block_idx = 0; block_idx < block_num; block_idx++) { - //__OP_SET_KERNEL__ - int code_idx = i * block_num + block_idx; -#ifdef FP_CEILING - SetCtrlFloatEnable(); -#else - SetCtrlFloatDisable(); -#endif - CodeInit(pos, false); - __KERNEL_FUN__(__KERNEL_ARGS__, param.tiling_data); - CodeEnd(); - kernel[code_idx] = (char *)pos; - len[code_idx] = CodeLen(); - pos += len[code_idx]; - printf("kernel %d core %ld code generated len %d\n", i, block_idx, len[code_idx]); - } - blknum[i] = block_num; - } - //__OP_FCLOSE__ - clock_gettime(CLOCK_MONOTONIC, &tp2); - buf = (unsigned char *)malloc(MAX_E); - int fd = open(param.entry_file, O_RDONLY); - if (fd < 0) { - printf("[error]: cannot find entry.o : %s\n", param.entry_file); - return 0; - } - uint32_t bufSize = read(fd, buf, MAX_E); - if (bufSize <= 0) { - printf("[error]: entry.o : %s is too small ! \n", param.entry_file); - } - close(fd); - jit = (unsigned char *)malloc(MAX_L); - printf("total code generated %ld\n", pos - code); - int sz = elf_append((char *)buf, bufSize, (char *)jit, KERNEL_N, blknum, kernel, len, pos - code, ¶m.kernel_name); - if (tp1.tv_sec != tp2.tv_sec) { - printf("%ld NS\n", tp2.tv_nsec + 1000000000 - tp1.tv_nsec); - } else { - printf("%ld NS\n", tp2.tv_nsec - tp1.tv_nsec); - } - printf("new elf size %d\n", sz); - if (param.gentype == 0) { - fd = open(param.output_kernel_file, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); - (void)write(fd, jit, sz); - close(fd); - free(jit); - } else if (param.gentype == 1) { - *param.objptr = (char*)jit; - } - free(buf); - free(code); - return sz; -} - -REG_REPLAY_FUNC(__OPTYPE__, __OPS_PRODUCT__, __KERNEL_FUN___replay___OPS_PRODUCT__); diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/tiling_data_def_build.py b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/tiling_data_def_build.py deleted file mode 100755 index a96304261c9369c9b2e24c5f2485615c0835f277..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/cmake/util/tiling_data_def_build.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -""" -Function: -The replay funtion entry -Copyright Information: -Huawei Technologies Co., Ltd. All Rights Reserved © 2020 -""" - -import sys -import os -import stat -import re -import const_var - - -def gen_tiling(tiling_header_file: str, tiling_file_out: str): - if not os.path.exists(tiling_header_file): - print("warning: no userdef tiling header file: ", tiling_header_file) - return - print("generate tiling def header file: ", tiling_file_out) - tmp_name = os.path.splitext(os.path.basename(tiling_header_file))[0].upper() - tiling_source = '#ifndef __{}_H__\n'.format(tmp_name) - tiling_source += '#define __{}_H__\n\n'.format(tmp_name) - tiling_source += '#include \n' - tiling_source += '#include \n\n' - tiling_source += '#include "kernel_tiling/kernel_tiling.h"\n\n' - end_source = "" - pattern = re.compile(r'[(](.*)[)]', re.S) - with open(tiling_header_file, 'r') as fd: - lines = fd.readlines() - for line in lines: - line = line.strip() - if (line.startswith('BEGIN_TILING_DATA_DEF')): - tiling_source += '#pragma pack(1)\n' - tiling_source += 'struct ' - struct_def = re.findall(pattern, line)[0] - tiling_source += struct_def + ' {\n' - elif (line.startswith('TILING_DATA_FIELD_DEF_ARR')): - field_params = re.findall(pattern, line)[0] - fds = field_params.split(',') - tiling_source += ' {} {}[{}] = {{}};\n'.format(fds[0].strip(), fds[2].strip(), fds[1].strip()) - elif (line.startswith('TILING_DATA_FIELD_DEF_STRUCT')): - field_params = re.findall(pattern, line)[0] - fds = field_params.split(',') - tiling_source += ' {} {};\n'.format(fds[0].strip(), fds[1].strip()) - elif (line.startswith('TILING_DATA_FIELD_DEF')): - field_params = re.findall(pattern, line)[0] - fds = field_params.split(',') - tiling_source += ' {} {} = 0;\n'.format(fds[0].strip(), fds[1].strip()) - elif (line.startswith('END_TILING_DATA_DEF')): - tiling_source += '};\n' - tiling_source += '#pragma pack()\n\n' - tiling_source += '#ifdef __NPU_TILING__\n' - tiling_source += \ - 'inline [aicore] void Init{stru}(const __gm__ uint8_t* tiling, {stru}* const_data)\n'\ - .format(stru=struct_def) - tiling_source += '{\n' - tiling_source += ' const __gm__ uint32_t *src = (const __gm__ uint32_t *)tiling;\n' - tiling_source += ' uint32_t *dst = (uint32_t *)const_data;\n' - tiling_source += ' for (auto i = 0; i < sizeof({}) / 4; i++) *(dst + i) = *(src + i);\n'\ - .format(struct_def) - tiling_source += '}\n' - tiling_source += '#else\n' - tiling_source += 'inline void Init{stru}(uint8_t* tiling, {stru}* const_data)\n'.format(stru=struct_def) - tiling_source += '{\n' - tiling_source += ' memcpy(const_data, tiling, sizeof({}));\n'.format(struct_def) - tiling_source += '}\n' - tiling_source += '#endif\n\n' - end_source = ''' -#define GET_TILING_DATA(tiling_data, tiling_arg) \\ -{stru} tiling_data; \\ -Init{stru}(tiling_arg, &tiling_data)\n -'''.format(stru=struct_def) - tiling_source += end_source - tiling_source += '#endif' - with os.fdopen(os.open(tiling_file_out, const_var.WFLAGS, const_var.WMODES), 'w') as ofd: - ofd.write(tiling_source) - - -if __name__ == '__main__': - if len(sys.argv) <= 2: - raise RuntimeError('arguments must greater than 2') - gen_tiling(sys.argv[1], sys.argv[2]) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/CMakeLists.txt deleted file mode 100755 index b6be9b492610f4d45b25bb7725648df9aac39a12..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mindspore") - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/caffe_plugin") - add_subdirectory(caffe_plugin) - endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tf_plugin") - add_subdirectory(tf_plugin) - endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_plugin") - add_subdirectory(onnx_plugin) - endif() -endif() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/CMakeLists.txt deleted file mode 100755 index a6aba5c207d3b85ad16fdea69dd813dd6cc371b1..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ - -aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} plugin_srcs) -add_library(cust_tf_parsers SHARED ${plugin_srcs}) -target_compile_definitions(cust_tf_parsers PRIVATE google=ascend_private) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_tf_parsers PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_tf_parsers PRIVATE intf_pub graph) -install(TARGETS cust_tf_parsers - LIBRARY DESTINATION packages/vendors/${vendor_name}/framework/tensorflow -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/tensorflow_add_custom_plugin.cc b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/tensorflow_add_custom_plugin.cc deleted file mode 100755 index 2cd837ce58d9aba9d65d15d57c5fe74adac868c4..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/tensorflow_add_custom_plugin.cc +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (C) 2020-2021. Huawei Technologies Co., Ltd. All -rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0. - * You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ - -#include "register/register.h" - -namespace domi { -// register op info to GE -REGISTER_CUSTOM_OP("AddCustom") - .FrameworkType(TENSORFLOW) // type: CAFFE, TENSORFLOW - .OriginOpType("Add") // name in tf module - .ParseParamsByOperatorFn(AutoMappingByOpFn); -} // namespace domi diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/CMakeLists.txt deleted file mode 100755 index 40dd51cfac524b0a9607b7d8b2813edd2210c509..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/CMakeLists.txt +++ /dev/null @@ -1,82 +0,0 @@ - -aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ops_srcs) - -opbuild(OPS_SRC ${ops_srcs} - OUT_DIR ${ASCEND_AUTOGEN_PATH} -) - -add_library(cust_op_proto SHARED ${ops_srcs} ${ASCEND_AUTOGEN_PATH}/op_proto.cc) -target_compile_definitions(cust_op_proto PRIVATE OP_PROTO_LIB) -target_compile_options(cust_op_proto PRIVATE - -fvisibility=hidden -) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_op_proto PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_op_proto PRIVATE - intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive -) -set_target_properties(cust_op_proto PROPERTIES OUTPUT_NAME - cust_opsproto_rt2.0 -) -add_library(cust_optiling SHARED ${ops_srcs}) -target_compile_definitions(cust_optiling PRIVATE OP_TILING_LIB) -target_compile_options(cust_optiling PRIVATE - -fvisibility=hidden -) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_optiling PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_optiling PRIVATE - intf_pub - exe_graph - register - tiling_api - -Wl,--whole-archive - rt2_registry - -Wl,--no-whole-archive -) -set_target_properties(cust_optiling PROPERTIES OUTPUT_NAME - cust_opmaster_rt2.0 -) - -file(GLOB aclnn_src ${ASCEND_AUTOGEN_PATH}/aclnn_*.cpp) -file(GLOB aclnn_inc ${ASCEND_AUTOGEN_PATH}/aclnn_*.h) -add_library(cust_opapi SHARED ${aclnn_src}) -if(ENABLE_CROSS_COMPILE) - target_link_directories(cust_opapi PRIVATE - ${CMAKE_COMPILE_COMPILER_LIBRARY} - ${CMAKE_COMPILE_RUNTIME_LIBRARY} - ) -endif() -target_link_libraries(cust_opapi PRIVATE intf_pub ascendcl nnopbase) - -add_custom_target(optiling_compat ALL - COMMAND ln -sf lib/linux/${CMAKE_SYSTEM_PROCESSOR}/$ - ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so -) - -install(TARGETS cust_op_proto - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_proto/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) -install(FILES ${ASCEND_AUTOGEN_PATH}/op_proto.h - DESTINATION packages/vendors/${vendor_name}/op_proto/inc) -install(TARGETS cust_optiling - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling/lib/linux/${CMAKE_SYSTEM_PROCESSOR}) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liboptiling.so - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/op_tiling) -install(TARGETS cust_opapi - LIBRARY DESTINATION packages/vendors/${vendor_name}/op_api/lib) -install(FILES ${aclnn_inc} - DESTINATION packages/vendors/${vendor_name}/op_api/include) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/CMakeLists.txt deleted file mode 100755 index 0d31a444cd71b6e455dc206b9b89159dea9f4ce2..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -# set custom compile options -if ("${CMAKE_BUILD_TYPE}x" STREQUAL "Debugx") - add_ops_compile_options(ALL OPTIONS -g -O0) -endif() - -foreach(compute_unit ${ASCEND_COMPUTE_UNIT}) - - # generate aic-${compute_unit}-ops-info.json - add_ops_info_target(TARGET ops_info_gen_${compute_unit} - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core/${compute_unit}/aic-${compute_unit}-ops-info.json - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/config/${compute_unit} - ) - - # generate ascendc impl py once - if (NOT TARGET ascendc_impl_gen) - add_ops_impl_target(TARGET ascendc_impl_gen - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl - ) - endif() - - # dynamic shape binary compile - if (${ENABLE_BINARY_PACKAGE}) - add_bin_compile_target(TARGET ascendc_bin_${compute_unit} - OPS_INFO ${ASCEND_AUTOGEN_PATH}/aic-${compute_unit}-ops-info.ini - IMPL_DIR ${CMAKE_CURRENT_SOURCE_DIR} - ADP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/dynamic - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/binary/${compute_unit} - INSTALL_DIR packages/vendors/${vendor_name}/op_impl/ai_core/tbe/kernel - COMPUTE_UNIT ${compute_unit} - ) - add_dependencies(ascendc_bin_${compute_unit} ascendc_impl_gen) - endif() - -endforeach() - -# generate npu_supported_ops.json -add_npu_support_target(TARGET npu_supported_ops - OPS_INFO_DIR ${ASCEND_AUTOGEN_PATH} - OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/tbe/op_info_cfg/ai_core - INSTALL_DIR packages/vendors/${vendor_name}/framework/${ASCEND_FRAMEWORK_TYPE} -) - -if(ENABLE_TEST AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/testcases) - add_subdirectory(testcases) -endif() - -# install kernel file -if (${ENABLE_SOURCE_PACKAGE}) - file(GLOB KERNEL_FILES - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.py - ) - install(FILES ${KERNEL_FILES} - DESTINATION packages/vendors/${vendor_name}/op_impl/ai_core/tbe/${vendor_name}_impl/dynamic - ) -endif() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/add_custom.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/add_custom.cpp deleted file mode 100755 index 8e5fa31f1b4f87c79d06fd86c4f511b9fc7d4aba..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/add_custom.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/** - * @file add_custom.cpp - * - * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "kernel_operator.h" -#include "tiling_key_add_custom.h" -constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue - -class KernelAdd { -public: - __aicore__ inline KernelAdd() {} - __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength) - { - this->totalLength = totalLength; - xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x, this->totalLength); - yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y, this->totalLength); - zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z, this->totalLength); - pipe.InitBuffer(inQueueX, BUFFER_NUM, this->totalLength * sizeof(DTYPE_X)); - pipe.InitBuffer(inQueueY, BUFFER_NUM, this->totalLength * sizeof(DTYPE_Y)); - pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->totalLength * sizeof(DTYPE_Z)); - } - __aicore__ inline void Process() - { - CopyIn(); - Compute(); - CopyOut(); - } - -private: - __aicore__ inline void CopyIn() - { - AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); - AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); - AscendC::DataCopy(xLocal, xGm, this->totalLength); - AscendC::DataCopy(yLocal, yGm, this->totalLength); - inQueueX.EnQue(xLocal); - inQueueY.EnQue(yLocal); - } - __aicore__ inline void Compute() - { - AscendC::LocalTensor xLocal = inQueueX.DeQue(); - AscendC::LocalTensor yLocal = inQueueY.DeQue(); - AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); - AscendC::Add(zLocal, xLocal, yLocal, this->totalLength); - outQueueZ.EnQue(zLocal); - inQueueX.FreeTensor(xLocal); - inQueueY.FreeTensor(yLocal); - } - __aicore__ inline void CopyOut() - { - AscendC::LocalTensor zLocal = outQueueZ.DeQue(); - AscendC::DataCopy(zGm, zLocal, this->totalLength); - outQueueZ.FreeTensor(zLocal); - } - -private: - AscendC::TPipe pipe; - AscendC::TQue inQueueX, inQueueY; - AscendC::TQue outQueueZ; - AscendC::GlobalTensor xGm; - AscendC::GlobalTensor yGm; - AscendC::GlobalTensor zGm; - uint32_t totalLength; -}; - -template - __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling) -{ - GET_TILING_DATA(tiling_data, tiling); - KernelAdd op; - op.Init(x, y, z, tiling_data.totalLength); - op.Process(); -} \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/install.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/install.sh deleted file mode 100755 index f950aa93086257de0a2a0744247a99bc7d2dda95..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/install.sh +++ /dev/null @@ -1,318 +0,0 @@ -#!/bin/bash -vendor_name=customize -targetdir=/usr/local/Ascend/opp -target_custom=0 - -sourcedir=$PWD/packages -vendordir=vendors/$vendor_name - -QUIET="y" - -while true -do - case $1 in - --quiet) - QUIET="y" - shift - ;; - --install-path=*) - INSTALL_PATH=$(echo $1 | cut -d"=" -f2-) - INSTALL_PATH=${INSTALL_PATH%*/} - shift - ;; - --*) - shift - ;; - *) - break - ;; - esac -done - -log() { - cur_date=`date +"%Y-%m-%d %H:%M:%S"` - echo "[runtime] [$cur_date] "$1 -} - -if [ -n "${INSTALL_PATH}" ]; then - if [[ ! "${INSTALL_PATH}" = /* ]]; then - log "[ERROR] use absolute path for --install-path argument" - exit 1 - fi - if [ ! -d ${INSTALL_PATH} ]; then - mkdir ${INSTALL_PATH} >> /dev/null 2>&1 - if [ $? -ne 0 ]; then - log "[ERROR] create ${INSTALL_PATH} failed" - exit 1 - fi - fi - targetdir=${INSTALL_PATH} -elif [ -n "${ASCEND_CUSTOM_OPP_PATH}" ]; then - if [ ! -d ${ASCEND_CUSTOM_OPP_PATH} ]; then - mkdir -p ${ASCEND_CUSTOM_OPP_PATH} >> /dev/null 2>&1 - if [ $? -ne 0 ]; then - log "[ERROR] create ${ASCEND_CUSTOM_OPP_PATH} failed" - fi - fi - targetdir=${ASCEND_CUSTOM_OPP_PATH} -else - if [ "x${ASCEND_OPP_PATH}" == "x" ]; then - log "[ERROR] env ASCEND_OPP_PATH no exist" - exit 1 - fi - targetdir="${ASCEND_OPP_PATH}" -fi - -if [ ! -d $targetdir ];then - log "[ERROR] $targetdir no exist" - exit 1 -fi - -upgrade() -{ - if [ ! -d ${sourcedir}/$vendordir/$1 ]; then - log "[INFO] no need to upgrade ops $1 files" - return 0 - fi - - if [ ! -d ${targetdir}/$vendordir/$1 ];then - log "[INFO] create ${targetdir}/$vendordir/$1." - mkdir -p ${targetdir}/$vendordir/$1 - if [ $? -ne 0 ];then - log "[ERROR] create ${targetdir}/$vendordir/$1 failed" - return 1 - fi - else - has_same_file=-1 - for file_a in ${sourcedir}/$vendordir/$1/*; do - file_b=${file_a##*/}; - if [ "ls ${targetdir}/$vendordir/$1" = "" ]; then - log "[INFO] ${targetdir}/$vendordir/$1 is empty !!" - return 1 - fi - grep -q $file_b <<<`ls ${targetdir}/$vendordir/$1`; - if [[ $? -eq 0 ]]; then - echo -n "${file_b} " - has_same_file=0 - fi - done - if [ 0 -eq $has_same_file ]; then - if test $QUIET = "n"; then - echo "[INFO]: has old version in ${targetdir}/$vendordir/$1, \ - you want to Overlay Installation , please enter:[o]; \ - or replace directory installation , please enter: [r]; \ - or not install , please enter:[n]." - - while true - do - read orn - if [ "$orn" = n ]; then - return 0 - elif [ "$orn" = m ]; then - break; - elif [ "$0rn" = r ]; then - [ -n "${targetdir}/$vendordir/$1/" ] && rm -rf "${targetdir}/$vendordir/$1"/* - break; - else - echo "[ERROR] input error, please input again!" - fi - done - fi - fi - log "[INFO] replace or merge old ops $1 files .g....." - fi - - log "copy new ops $1 files ......" - if [ -d ${targetdir}/$vendordir/$1/ ]; then - chmod -R +w "$targetdir/$vendordir/$1/" >/dev/null 2>&1 - fi - cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ - if [ $? -ne 0 ];then - log "[ERROR] copy new $1 files failed" - return 1 - fi - - return 0 -} -upgrade_proto() -{ - if [ ! -f ${sourcedir}/$vendordir/custom.proto ]; then - log "[INFO] no need to upgrade custom.proto files" - return 0 - fi - if [ ! -d ${targetdir}/$vendordir/framework/caffe ];then - log "[INFO] create ${targetdir}/$vendordir/framework/caffe." - mkdir -p ${targetdir}/$vendordir/framework/caffe - if [ $? -ne 0 ];then - log "[ERROR] create ${targetdir}/$vendordir/framework/caffe failed" - return 1 - fi - else - if [ -f ${targetdir}/$vendordir/framework/caffe/custom.proto ]; then - # 有老版本,判断是否要覆盖式安装 - if test $QUIET = "n"; then - echo "[INFO] ${targetdir}/$vendordir/framework/caffe has old version"\ - "custom.proto file. Do you want to replace? [y/n] " - - while true - do - read yn - if [ "$yn" = n ]; then - return 0 - elif [ "$yn" = y ]; then - break; - else - echo "[ERROR] input error, please input again!" - fi - done - fi - fi - log "[INFO] replace old caffe.proto files ......" - fi - chmod -R +w "$targetdir/$vendordir/framework/caffe/" >/dev/null 2>&1 - cp -rf ${sourcedir}/$vendordir/custom.proto ${targetdir}/$vendordir/framework/caffe/ - if [ $? -ne 0 ];then - log "[ERROR] copy new custom.proto failed" - return 1 - fi - log "[INFO] copy custom.proto success" - - return 0 -} - -upgrade_file() -{ - if [ ! -e ${sourcedir}/$vendordir/$1 ]; then - log "[INFO] no need to upgrade ops $1 file" - return 0 - fi - - log "copy new $1 files ......" - cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 - if [ $? -ne 0 ];then - log "[ERROR] copy new $1 file failed" - return 1 - fi - - return 0 -} - -delete_optiling_file() -{ - if [ ! -d ${targetdir}/vendors ];then - log "[INFO] $1 not exist, no need to uninstall" - return 0 - fi - sys_info=$(uname -m) - if [ ! -d ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/lib/linux/${sys_info} ];then - rm -rf ${sourcedir}/$vendordir/$1/ai_core/tbe/op_tiling/liboptiling.so - fi - return 0 -} - -log "[INFO] copy uninstall sh success" - -if [ ! -d ${targetdir}/vendors ];then - log "[INFO] create ${targetdir}/vendors." - mkdir -p ${targetdir}/vendors - if [ $? -ne 0 ];then - log "[ERROR] create ${targetdir}/vendors failed" - return 1 - fi -fi -chmod u+w ${targetdir}/vendors - -echo "[ops_custom]upgrade framework" -upgrade framework -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op proto" -upgrade op_proto -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade version.info" -upgrade_file version.info -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op impl" -delete_optiling_file op_impl -upgrade op_impl -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op api" -upgrade op_api -if [ $? -ne 0 ];then - exit 1 -fi - -upgrade_proto -if [ $? -ne 0 ];then - exit 1 -fi - -# set the set_env.bash -if [ -n "${INSTALL_PATH}" ] && [ -d ${INSTALL_PATH} ]; then - _ASCEND_CUSTOM_OPP_PATH=${targetdir}/${vendordir} - bin_path="${_ASCEND_CUSTOM_OPP_PATH}/bin" - set_env_variable="#!/bin/bash\nexport ASCEND_CUSTOM_OPP_PATH=${_ASCEND_CUSTOM_OPP_PATH}:\${ASCEND_CUSTOM_OPP_PATH}" - if [ ! -d ${bin_path} ]; then - mkdir -p ${bin_path} >> /dev/null 2>&1 - if [ $? -ne 0 ]; then - log "[ERROR] create ${bin_path} failed" - exit 1 - fi - fi - echo -e ${set_env_variable} > ${bin_path}/set_env.bash - if [ $? -ne 0 ]; then - log "[ERROR] write ASCEND_CUSTOM_OPP_PATH to set_env.bash failed" - exit 1 - else - log "[INFO] using requirements: when custom module install finished or before you run the custom module, \ - execute the command [ source ${bin_path}/set_env.bash ] to set the environment path" - fi -else - config_file=${targetdir}/vendors/config.ini - if [ ! -f ${config_file} ]; then - touch ${config_file} - chmod 640 ${config_file} - echo "load_priority=$vendor_name" > ${config_file} - if [ $? -ne 0 ];then - echo "echo load_priority failed" - exit 1 - fi - else - found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" - found_vendor=$(echo $found_vendors | sed "s/\<$vendor_name>\//g" | tr ',' ' ') - vendor=$(echo $found_vendor | tr -s ' ' ',') - if [ "$vendor" != "" ]; then - sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" - fi - fi -fi - -chmod u-w ${targetdir}/vendors - -if [ -d ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/ ]; then - chmod -R 440 ${targetdir}/$vendordir/op_impl/cpu/aicpu_kernel/impl/* >/dev/null 2>&1 -fi -if [ -f ${targetdir}/ascend_install.info ]; then - chmod -R 440 ${targetdir}/ascend_install.info -fi -if [ -f ${targetdir}/scene.info ]; then - chmod -R 440 ${targetdir}/scene.info -fi -if [ -f ${targetdir}/version.info ]; then - chmod -R 440 ${targetdir}/version.info -fi - -echo "SUCCESS" -exit 0 - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/upgrade.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/upgrade.sh deleted file mode 100755 index dc4a0cfc88947084848a917c0989de7210fa1d63..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/scripts/upgrade.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/bash -vendor_name=customize -targetdir=/usr/local/Ascend/opp -target_custom=0 - -sourcedir=$PWD/packages -vendordir=vendors/$vendor_name - -log() { - cur_date=`date +"%Y-%m-%d %H:%M:%S"` - echo "[runtime] [$cur_date] "$1 -} - -if [[ "x${ASCEND_OPP_PATH}" == "x" ]];then - log "[ERROR] env ASCEND_OPP_PATH no exist" - exit 1 -fi - -targetdir=${ASCEND_OPP_PATH} - -if [ ! -d $targetdir ];then - log "[ERROR] $targetdir no exist" - exit 1 -fi - -upgrade() -{ - if [ ! -d ${sourcedir}/$vendordir/$1 ]; then - log "[INFO] no need to upgrade ops $1 files" - return 0 - fi - - if [ ! -d ${targetdir}/$vendordir/$1 ];then - log "[INFO] create ${targetdir}/$vendordir/$1." - mkdir -p ${targetdir}/$vendordir/$1 - if [ $? -ne 0 ];then - log "[ERROR] create ${targetdir}/$vendordir/$1 failed" - return 1 - fi - else - vendor_installed_dir=$(ls "$targetdir/vendors" 2> /dev/null) - for i in $vendor_installed_dir;do - vendor_installed_file=$(ls "$vendor_installed_dir/$vendor_name/$i" 2> /dev/null) - if [ "$i" = "$vendor_name" ] && [ "$vendor_installed_file" != "" ]; then - echo "[INFO]: $vendor_name custom opp package has been installed on the path $vendor_installed_dir, \ - you want to Overlay Installation , please enter:[o]; \ - or replace directory installation , please enter: [r]; \ - or not install , please enter:[n]." - fi - while true - do - read mrn - if [ "$mrn" = m ]; then - break - elif [ "$mrn" = r ]; then - [ -n "$vendor_installed_file"] && rm -rf "$vendor_installed_file" - break - elif [ "$mrn" = n ]; then - return 0 - else - echo "[WARNING]: Input error, please input m or r or n to choose!" - fi - done - done - log "[INFO] replace old ops $1 files ......" - fi - - log "copy new ops $1 files ......" - cp -rf ${sourcedir}/$vendordir/$1/* $targetdir/$vendordir/$1/ - if [ $? -ne 0 ];then - log "[ERROR] copy new $1 files failed" - return 1 - fi - - return 0 -} - -upgrade_file() -{ - if [ ! -e ${sourcedir}/$vendordir/$1 ]; then - log "[INFO] no need to upgrade ops $1 file" - return 0 - fi - - log "copy new $1 files ......" - cp -f ${sourcedir}/$vendordir/$1 $targetdir/$vendordir/$1 - if [ $? -ne 0 ];then - log "[ERROR] copy new $1 file failed" - return 1 - fi - - return 0 -} - -log "[INFO] copy uninstall sh success" - -echo "[ops_custom]upgrade framework" -upgrade framework -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op proto" -upgrade op_proto -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op impl" -upgrade op_impl -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade op api" -upgrade op_api -if [ $? -ne 0 ];then - exit 1 -fi - -echo "[ops_custom]upgrade version.info" -upgrade_file version.info -if [ $? -ne 0 ];then - exit 1 -fi - -config_file=${targetdir}/vendors/config.ini -found_vendors="$(grep -w "load_priority" "$config_file" | cut --only-delimited -d"=" -f2-)" -found_vendor=$(echo $found_vendors | sed "s/\<$vendor_name>\//g" | tr ',' ' ') -vendor=$(echo $found_vendor | tr -s ' ' ',') -if [ "$vendor" != "" ]; then - sed -i "/load_priority=$found_vendors/s@load_priority=$found_vendors@load_priority=$vendor_name,$vendor@g" "$config_file" -fi - -changemode() -{ - if [ -d ${targetdir} ];then - chmod -R 550 ${targetdir}>/dev/null 2>&1 - fi - - return 0 -} -echo "[ops_custom]changemode..." -#changemode -if [ $? -ne 0 ];then - exit 1 -fi - -echo "SUCCESS" -exit 0 - diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/README.md deleted file mode 100644 index aee611a3ac37d916ff81b7b82ed4c433b38dc5df..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/README.md +++ /dev/null @@ -1,204 +0,0 @@ -## 使用cpp-extension的方式调用AddTemplateCustom算子工程 - - cpp-extension方式是通过编译出一个C++的算子扩展包的形式来调用,可以分为Jit(即时编译)、load_library、编译wheel包和自定义算子入图注册(可选)的形式。以下均使用Pytorch2.1.0完成样例验证,支持Python3.8和Python3.9。 - 样例中各自包含run.sh执行脚本供用户参考使用。 - -### 编译前准备 - - 安装pytorch和torch-npu - - 根据[torch-npu](https://gitee.com/ascend/pytorch)的安装说明,进行源码编译安装或直接安装torch-npu包。以Pytorch2.1.0、Python3.9、CANN版本8.0.RC1.alpha002为例,安装命令如下,具体操作或者其他版本pytorch安装请参照[torch-npu](https://gitee.com/ascend/pytorch)源码仓,此处仅为示例。 - ```bash - pip3 install torch==2.1.0 - pip3 install torch-npu==2.1.0 - ``` -### 使用Jit的方式调用 - 该样例脚本基于Pytorch2.1运行,2.1以下版本中NPU设备绑定的设备名称有变化,可自行参考样例代码中的注释说明。 - -#### 安装依赖 - - - 安装编译依赖 - ```bash - pip3 install pyyaml - pip3 install wheel - pip3 install setuptools - ``` - - - 安装测试依赖 - ```bash - pip3 install Ninja - pip3 install expecttest - ``` - -#### 样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit - ``` - - - 配置环境变量 - - ```bash - export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH - ``` - - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后通过cpp_extension扩展机制,使用实时编译模式编译并执行AclNN算子接口,最后检验运行结果。 - ```bash - cd test - python3 test_add_custom.py - ``` - - 用户亦可参考run.sh脚本进行编译与运行。 - ```bash - bash run.sh - ``` - -### 使用load_library的方式调用 - - 该样例基于Pytorch2.1运行,2.1以下版本中NPU设备绑定的设备名称有变化,可自行参考样例代码中的注释说明。 - -#### 编译自定义API的so - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library - ``` - - - 执行编译命令 - - ```bash - mkdir build - cd build - cmake .. - make -j - ``` - -#### 样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/test - ``` - - - 配置环境变量 - - ```bash - export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH - ``` - - - 样例执行 - - ```bash - python3 test_add_custom.py - ``` - - 用户亦可参考run.sh脚本进行编译与运行。 - ```bash - bash run.sh - ``` - -### 使用编译wheel包的方式调用 - - 该样例基于Pytorch2.1运行,2.1以下版本中NPU设备绑定的设备名称有变化,可自行参考样例代码中的注释说明。 - -#### 编译自定义算子wheel包 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup - ``` - - - 执行编译命令 - - ```bash - python3 setup.py build bdist_wheel - ``` - - - 安装wheel包 - - ```bash - cd dist/ - pip3 install custom_ops-1.0-cp38-cp38-linux_aarch64.whl (需要修改为实际编译出的whl包) - ``` - -#### 样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test - ``` - - - 配置环境变量 - - ```bash - export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH - ``` - - - 样例执行 - - ```bash - python3 test_add_custom.py - ``` - - 用户亦可参考run.sh脚本进行编译与运行。 - ```bash - bash run.sh - ``` - -#### 自定义算子入图注册(可选) - - 此功能可以让用户将开发的自定义算子增加入图能力,为可选步骤,如不需要入图则可以跳过。进行此步骤需要先安装[torchair](https://gitee.com/ascend/torchair)(当前仅支持Pytorch2.1),然后为自定义算子注册meta后端实现,用来完成图模式下的shape推导,具体参考extension_add.cpp文件中为Meta设备注册后端实现的相关代码,示例代码中相关注册过程已经添加,按如下步骤进行编译注册等即可完成入图。 - - - 根据Ascend C工程产生的REG_OP算子原型填充torchair.ge.custom_op的参数。 - AddCustom的REG_OP原型为: - - ```cpp - REG_OP(AddCustom) - .INPUT(x, ge::TensorType::ALL()) - .INPUT(y, ge::TensorType::ALL()) - .OUTPUT(z, ge::TensorType::ALL()) - .OP_END_FACTORY_REG(AddCustom); - ``` - - - 注册自定义算子的converter - - 在自己的调用文件里面调用@register_fx_node_ge_converter,完成converter注册,这部分代码在test/test_add_custom_graph.py用例里面已经添加。 - ```python - from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, Callable - import torchair - from torchair import register_fx_node_ge_converter - from torchair.ge import Tensor - import custom_ops - - - # 注意: meta_outputs形参名为固定写法,若写错会影响ge节点的输出dtype与shape推导 - @register_fx_node_ge_converter(torch.ops.myops.my_op.default) - def convert_npu_add_custom(x: Tensor, y: Tensor, z: Tensor = None, meta_outputs: Any = None): - return torchair.ge.custom_op( - "AddCustom", - inputs={ - "x": x, - "y": y, - }, - outputs=['z'] - ) - ``` - - - 样例执行 - - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test - python3 test_add_custom_graph.py - ``` - -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/extension_add.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/extension_add.cpp deleted file mode 100644 index a8b025fb00274e1056c0e39421a9ce7f27d510fd..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/extension_add.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/** - * @file extension_add.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include - -#include "pytorch_npu_helper.hpp" -using torch::autograd::AutogradContext; -using torch::autograd::Function; -using tensor_list = std::vector; -using namespace at; - -// register forward implementation for NPU device -at::Tensor my_op_impl_npu(const at::Tensor &self, const at::Tensor &other) -{ - // alloc output memory - at::Tensor result = at::Tensor(self); - - // call aclnn interface to perform the computation - EXEC_NPU_CMD(aclnnAddCustom, self, other, result); - return result; -} - -// register backward implementation for NPU device -std::tuple my_op_backward_impl_npu(const at::Tensor &self) -{ - at::Tensor result = at::Tensor(self); // Create output memory - - return {result, result}; -} - -// register forward implementation for Meta device -at::Tensor my_op_impl_meta(const at::Tensor &self, const at::Tensor &other) -{ - return empty_like(self); -} - -// register backward implementation for Meta device -std::tuple my_op_backward_impl_meta(const at::Tensor &self) -{ - auto result = empty_like(self); - return std::make_tuple(result, result); -} - -// look up the implementation registered for different devices for this operation -at::Tensor my_op_impl(const at::Tensor &self, const at::Tensor &other) -{ - static auto op = torch::Dispatcher::singleton().findSchemaOrThrow("myops::my_op", "").typed(); - return op.call(self, other); -} - -// look up the implementation registered for different devices for this operation -std::tuple my_op_backward_impl(const at::Tensor &self) -{ - static auto op = torch::Dispatcher::singleton() - .findSchemaOrThrow("myops::my_op_backward", "") - .typed(); - return op.call(self); -} - -// implement forward and backward binding by inheriting the torch::autograd::Function class -class MyAddFunction : public torch::autograd::Function { -public: - static at::Tensor forward(AutogradContext *ctx, at::Tensor self, at::Tensor other) - { - at::AutoDispatchBelowADInplaceOrView guard; - return my_op_impl(self, other); - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) - { - auto grad_output = grad_outputs[0]; - auto result = my_op_backward_impl(grad_output); - return {std::get<0>(result), std::get<1>(result)}; - } -}; - -// call apply() method when using it -at::Tensor my_op_impl_autograd(const at::Tensor &self, const at::Tensor &other) -{ - return MyAddFunction::apply(self, other); -} - -// register the schemas for my_op and my_op_backward in the myops namespace -TORCH_LIBRARY(myops, m) -{ - m.def("my_op(Tensor self, Tensor other) -> Tensor"); - m.def("my_op_backward(Tensor self) -> (Tensor, Tensor)"); -} - -// register forward and backward implementations for the NPU device -// the device name used by the NPU device in PyTorch 2.1 and above is PrivateUse1. -// in versions below 2.1, XLA is used. If the version is below 2.1, PrivateUse1 needs to be changed to XLA. -TORCH_LIBRARY_IMPL(myops, PrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_npu); - m.impl("my_op_backward", &my_op_backward_impl_npu); -} - -// bind the NPU's autograd implementation to the operation -// if the version is below PyTorch 2.1, AutogradPrivateUse1 needs to be changed to AutogradXLA. -TORCH_LIBRARY_IMPL(myops, AutogradPrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_autograd); -} - -// register forward and backward implementations for the Meta device -TORCH_LIBRARY_IMPL(myops, Meta, m) -{ - m.impl("my_op", &my_op_impl_meta); - m.impl("my_op_backward", &my_op_backward_impl_meta); -} - -// bind the C++ interface to the Python interface using pybind -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) -{ - m.def("add_custom", &my_op_impl_autograd, "x + y"); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/pytorch_npu_helper.hpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/pytorch_npu_helper.hpp deleted file mode 100644 index 1c957c85f4b1a76aa62a26c55edf3fd9a064b807..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/pytorch_npu_helper.hpp +++ /dev/null @@ -1,567 +0,0 @@ -/** - * @file pytorch_npu_helper.hpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef PYTORCH_NPU_HELPER_HPP_ -#define PYTORCH_NPU_HELPER_HPP_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "torch_npu/csrc/aten/NPUNativeFunctions.h" -#include "torch_npu/csrc/core/npu/NPUStream.h" -#include "torch_npu/csrc/framework/OpCommand.h" -#include "torch_npu/csrc/framework/interface/EnvVariables.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" -#include "torch_npu/csrc/framework/utils/OpPreparation.h" - -#define NPU_NAME_SPACE at_npu::native - -#define __FILENAME__ (strrchr("/" __FILE__, '/') + 1) - -typedef struct aclOpExecutor aclOpExecutor; -typedef struct aclTensor aclTensor; -typedef struct aclScalar aclScalar; -typedef struct aclIntArray aclIntArray; -typedef struct aclFloatArray aclFloatArray; -typedef struct aclBoolArray aclBoolArray; -typedef struct aclTensorList aclTensorList; - -typedef aclTensor *(*_aclCreateTensor)(const int64_t *view_dims, uint64_t view_dims_num, aclDataType data_type, - const int64_t *stride, int64_t offset, aclFormat format, - const int64_t *storage_dims, uint64_t storage_dims_num, void *tensor_data); -typedef aclScalar *(*_aclCreateScalar)(void *value, aclDataType data_type); -typedef aclIntArray *(*_aclCreateIntArray)(const int64_t *value, uint64_t size); -typedef aclFloatArray *(*_aclCreateFloatArray)(const float *value, uint64_t size); -typedef aclBoolArray *(*_aclCreateBoolArray)(const bool *value, uint64_t size); -typedef aclTensorList *(*_aclCreateTensorList)(const aclTensor *const *value, uint64_t size); - -typedef int (*_aclDestroyTensor)(const aclTensor *tensor); -typedef int (*_aclDestroyScalar)(const aclScalar *scalar); -typedef int (*_aclDestroyIntArray)(const aclIntArray *array); -typedef int (*_aclDestroyFloatArray)(const aclFloatArray *array); -typedef int (*_aclDestroyBoolArray)(const aclBoolArray *array); -typedef int (*_aclDestroyTensorList)(const aclTensorList *array); - -constexpr int kHashBufSize = 8192; -constexpr int kHashBufMaxSize = kHashBufSize + 1024; -extern thread_local char g_hashBuf[kHashBufSize]; -extern thread_local int g_hashOffset; - -#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ - _(at::ScalarType::Byte, ACL_UINT8) \ - _(at::ScalarType::Char, ACL_INT8) \ - _(at::ScalarType::Short, ACL_INT16) \ - _(at::ScalarType::Int, ACL_INT32) \ - _(at::ScalarType::Long, ACL_INT64) \ - _(at::ScalarType::Half, ACL_FLOAT16) \ - _(at::ScalarType::Float, ACL_FLOAT) \ - _(at::ScalarType::Double, ACL_DOUBLE) \ - _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ - _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ - _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ - _(at::ScalarType::Bool, ACL_BOOL) \ - _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ - _(at::ScalarType::BFloat16, ACL_BF16) \ - _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ - _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ - _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) - -constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { -#define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) -#undef DEFINE_ENUM -}; - -#define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) - -#define MEMCPY_TO_BUF(data_expression, size_expression) \ - if (g_hashOffset + (size_expression) > kHashBufSize) { \ - g_hashOffset = kHashBufMaxSize; \ - return; \ - } \ - memcpy(g_hashBuf + g_hashOffset, data_expression, size_expression); \ - g_hashOffset += size_expression; - -inline const char *GetOpApiLibName(void) { return "libopapi.so"; } - -inline const char *GetCustOpApiLibName(void) { return "libcust_opapi.so"; } - -inline void *GetOpApiFuncAddrInLib(void *handler, const char *libName, const char *apiName) -{ - auto funcAddr = dlsym(handler, apiName); - if (funcAddr == nullptr) { - ASCEND_LOGW("dlsym %s from %s failed, error:%s.", apiName, libName, dlerror()); - } - return funcAddr; -} - -inline void *GetOpApiLibHandler(const char *libName) -{ - auto handler = dlopen(libName, RTLD_LAZY); - if (handler == nullptr) { - ASCEND_LOGW("dlopen %s failed, error:%s.", libName, dlerror()); - } - return handler; -} - -inline void *GetOpApiFuncAddr(const char *apiName) -{ - static auto custOpApiHandler = GetOpApiLibHandler(GetCustOpApiLibName()); - if (custOpApiHandler != nullptr) { - auto funcAddr = GetOpApiFuncAddrInLib(custOpApiHandler, GetCustOpApiLibName(), apiName); - if (funcAddr != nullptr) { - return funcAddr; - } - } - - static auto opApiHandler = GetOpApiLibHandler(GetOpApiLibName()); - if (opApiHandler == nullptr) { - return nullptr; - } - return GetOpApiFuncAddrInLib(opApiHandler, GetOpApiLibName(), apiName); -} - -inline c10::Scalar ConvertTensorToScalar(const at::Tensor &tensor) -{ - c10::Scalar expScalar; - const at::Tensor *aclInput = &tensor; - if (aclInput->scalar_type() == at::ScalarType::Double) { - double value = *(double *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Long) { - int64_t value = *(int64_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Float) { - float value = *(float *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Int) { - int value = *(int *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Half) { - c10::Half value = *(c10::Half *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Bool) { - int8_t value = *(int8_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexDouble) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexFloat) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::BFloat16) { - c10::BFloat16 value = *(c10::BFloat16 *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } - return expScalar; -} - -inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) -{ - at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); - int deviceIndex = 0; - return cpuPinMemTensor.to(c10::Device(torch_npu::utils::get_npu_device_type(), deviceIndex), - cpuPinMemTensor.scalar_type(), true, true); -} - -inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) -{ - return CopyTensorHostToDevice(scalar_to_tensor(cpu_scalar).to(scalar_data_type)); -} - -inline aclTensor *ConvertType(const at::Tensor &at_tensor) -{ - static const auto aclCreateTensor = GET_OP_API_FUNC(aclCreateTensor); - if (aclCreateTensor == nullptr) { - return nullptr; - } - - if (!at_tensor.defined()) { - return nullptr; - } - at::ScalarType scalar_data_type = at_tensor.scalar_type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - c10::SmallVector storageDims; - // if acl_data_type is ACL_STRING, storageDims is empty. - auto itemsize = at_tensor.itemsize(); - if (itemsize == 0) { - AT_ERROR("When ConvertType, tensor item size of cannot be zero."); - return nullptr; - } - if (acl_data_type != ACL_STRING) { - storageDims.push_back(at_tensor.storage().nbytes() / itemsize); - } - - const auto dimNum = at_tensor.sizes().size(); - aclFormat format = ACL_FORMAT_ND; - switch (dimNum) { - case 3: - format = ACL_FORMAT_NCL; - break; - case 4: - format = ACL_FORMAT_NCHW; - break; - case 5: - format = ACL_FORMAT_NCDHW; - break; - default: - format = ACL_FORMAT_ND; - } - - if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { - c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); - at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); - return aclCreateTensor(aclInput.sizes().data(), aclInput.sizes().size(), acl_data_type, - aclInput.strides().data(), aclInput.storage_offset(), format, storageDims.data(), - storageDims.size(), const_cast(aclInput.storage().data())); - } - - auto acl_tensor = - aclCreateTensor(at_tensor.sizes().data(), at_tensor.sizes().size(), acl_data_type, at_tensor.strides().data(), - at_tensor.storage_offset(), format, storageDims.data(), storageDims.size(), - const_cast(at_tensor.storage().data())); - return acl_tensor; -} - -inline aclScalar *ConvertType(const at::Scalar &at_scalar) -{ - static const auto aclCreateScalar = GET_OP_API_FUNC(aclCreateScalar); - if (aclCreateScalar == nullptr) { - return nullptr; - } - - at::ScalarType scalar_data_type = at_scalar.type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - aclScalar *acl_scalar = nullptr; - switch (scalar_data_type) { - case at::ScalarType::Double: { - double value = at_scalar.toDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Long: { - int64_t value = at_scalar.toLong(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Bool: { - bool value = at_scalar.toBool(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::ComplexDouble: { - auto value = at_scalar.toComplexDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - default: - acl_scalar = nullptr; - break; - } - return acl_scalar; -} - -inline aclIntArray *ConvertType(const at::IntArrayRef &at_array) -{ - static const auto aclCreateIntArray = GET_OP_API_FUNC(aclCreateIntArray); - if (aclCreateIntArray == nullptr) { - return nullptr; - } - auto array = aclCreateIntArray(at_array.data(), at_array.size()); - return array; -} - -template -inline aclBoolArray *ConvertType(const std::array &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclBoolArray *ConvertType(const at::ArrayRef &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclTensorList *ConvertType(const at::TensorList &at_tensor_list) -{ - static const auto aclCreateTensorList = GET_OP_API_FUNC(aclCreateTensorList); - if (aclCreateTensorList == nullptr) { - return nullptr; - } - - std::vector tensor_list(at_tensor_list.size()); - for (size_t i = 0; i < at_tensor_list.size(); i++) { - tensor_list[i] = ConvertType(at_tensor_list[i]); - } - auto acl_tensor_list = aclCreateTensorList(tensor_list.data(), tensor_list.size()); - return acl_tensor_list; -} - -inline aclTensor *ConvertType(const c10::optional &opt_tensor) -{ - if (opt_tensor.has_value() && opt_tensor.value().defined()) { - return ConvertType(opt_tensor.value()); - } - return nullptr; -} - -inline aclIntArray *ConvertType(const c10::optional &opt_array) -{ - if (opt_array.has_value()) { - return ConvertType(opt_array.value()); - } - return nullptr; -} - -inline aclScalar *ConvertType(const c10::optional &opt_scalar) -{ - if (opt_scalar.has_value()) { - return ConvertType(opt_scalar.value()); - } - return nullptr; -} - -inline aclDataType ConvertType(const at::ScalarType scalarType) -{ - return kATenScalarTypeToAclDataTypeTable[static_cast(scalarType)]; -} - -template -T ConvertType(T value) -{ - return value; -} - -template -auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr, std::index_sequence) -{ - typedef int (*OpApiFunc)(typename std::decay(params))>::type...); - auto func = reinterpret_cast(opApiAddr); - return func; -} - -template -auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr) -{ - static constexpr auto size = std::tuple_size::value; - return ConvertToOpApiFunc(params, opApiAddr, std::make_index_sequence{}); -} - -inline void Release(aclTensor *p) -{ - static const auto aclDestroyTensor = GET_OP_API_FUNC(aclDestroyTensor); - if (aclDestroyTensor == nullptr) { - return; - } - aclDestroyTensor(p); -} - -inline void Release(aclScalar *p) -{ - static const auto aclDestroyScalar = GET_OP_API_FUNC(aclDestroyScalar); - if (aclDestroyScalar == nullptr) { - return; - } - aclDestroyScalar(p); -} - -inline void Release(aclIntArray *p) -{ - static const auto aclDestroyIntArray = GET_OP_API_FUNC(aclDestroyIntArray); - if (aclDestroyIntArray == nullptr) { - return; - } - - aclDestroyIntArray(p); -} - -inline void Release(aclBoolArray *p) -{ - static const auto aclDestroyBoolArray = GET_OP_API_FUNC(aclDestroyBoolArray); - if (aclDestroyBoolArray == nullptr) { - return; - } - - aclDestroyBoolArray(p); -} - -inline void Release(aclTensorList *p) -{ - static const auto aclDestroyTensorList = GET_OP_API_FUNC(aclDestroyTensorList); - if (aclDestroyTensorList == nullptr) { - return; - } - - aclDestroyTensorList(p); -} - -template -void Release(T value) -{ - (void)value; -} - -template -void CallRelease(Tuple t, std::index_sequence) -{ - (void)std::initializer_list{(Release(std::get(t)), 0)...}; -} - -template -void ReleaseConvertTypes(Tuple &t) -{ - static constexpr auto size = std::tuple_size::value; - CallRelease(t, std::make_index_sequence{}); -} - -template -constexpr auto ConvertTypes(Ts &...args) -{ - return std::make_tuple(ConvertType(args)...); -} - -template -auto call(Function f, Tuple t, std::index_sequence) -{ - return f(std::get(t)...); -} - -template -auto call(Function f, Tuple t) -{ - static constexpr auto size = std::tuple_size::value; - return call(f, t, std::make_index_sequence{}); -} - -template -void AddParamToBuf(const std::array &value) -{ - MEMCPY_TO_BUF(value.data(), value.size() * sizeof(bool)); -} - -template -void AddParamToBuf(const T &value) -{ - MEMCPY_TO_BUF(&value, sizeof(T)); -} - -void AddParamToBuf(const at::Tensor &); -void AddParamToBuf(const at::Scalar &); -void AddParamToBuf(const at::IntArrayRef &); -void AddParamToBuf(const at::ArrayRef &); -void AddParamToBuf(const at::TensorList &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const at::ScalarType); -void AddParamToBuf(const string &); -void AddParamToBuf(); - -template -void AddParamToBuf(const T &arg, Args &...args) -{ - AddParamToBuf(arg); - AddParamToBuf(args...); -} - -uint64_t CalcHashId(); -typedef int (*InitHugeMemThreadLocal)(void *, bool); -typedef void (*UnInitHugeMemThreadLocal)(void *, bool); -typedef void (*ReleaseHugeMem)(void *, bool); - -#define EXEC_NPU_CMD(aclnn_api, ...) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ - static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ - static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ - TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, #aclnn_api, " or ", \ - #aclnn_api "GetWorkspaceSize", " not in ", GetOpApiLibName(), ", or ", GetOpApiLibName(), \ - "not found."); \ - auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ - uint64_t workspace_size = 0; \ - uint64_t *workspace_size_addr = &workspace_size; \ - aclOpExecutor *executor = nullptr; \ - aclOpExecutor **executor_addr = &executor; \ - InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ - UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ - if (initMemFunc) { \ - initMemFunc(nullptr, false); \ - } \ - auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ - static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ - auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ - TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - void *workspace_addr = nullptr; \ - if (workspace_size != 0) { \ - at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ - auto workspace_tensor = at::empty({workspace_size}, options.dtype(kByte)); \ - workspace_addr = const_cast(workspace_tensor.storage().data()); \ - } \ - auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ - typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ - OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ - auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ - TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - ReleaseConvertTypes(converted_params); \ - ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ - if (releaseMemFunc) { \ - releaseMemFunc(nullptr, false); \ - } \ - return api_ret; \ - }; \ - at_npu::native::OpCommand cmd; \ - cmd.Name(#aclnn_api); \ - cmd.SetCustomHandler(acl_call); \ - cmd.Run(); \ - if (unInitMemFunc) { \ - unInitMemFunc(nullptr, false); \ - } \ - } while (false) - -#endif // PYTORCH_NPU_HELPER_HPP_ \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/run.sh deleted file mode 100644 index 694e5f8b8c2240851d2f160cbad56c29c29de577..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash - -set -e -pip3 install pyyaml wheel setuptools Ninja expecttest -export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib:$LD_LIBRARY_PATH -( - cd test - python3 test_add_custom.py -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/test/test_add_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/test/test_add_custom.py deleted file mode 100644 index 1f3bd0395f077228376a92e05d157bafcc79d959..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/jit/test/test_add_custom.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import os -import shutil -import unittest -import torch -import torch.utils.cpp_extension -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests - -PYTORCH_NPU_INSTALL_PATH = os.path.dirname(os.path.abspath(torch_npu.__file__)) - - -def remove_build_path(): - default_build_root = torch.utils.cpp_extension.get_default_build_root() - if os.path.exists(default_build_root): - shutil.rmtree(default_build_root) - - -class TestCppExtensionsJIT(TestCase): - - def setUp(self): - super().setUp() - self.old_working_dir = os.getcwd() - os.chdir(os.path.dirname(os.path.abspath(__file__))) - - def tearDown(self): - super().tearDown() - os.chdir(self.old_working_dir) - - @classmethod - def setUpClass(cls): - super().setUpClass() - remove_build_path() - - @classmethod - def tearDownClass(cls): - remove_build_path() - - def _test_jit_compile_extension_with_cpp(self): - extra_ldflags = [] - extra_ldflags.append("-ltorch_npu") - extra_ldflags.append(f"-L{PYTORCH_NPU_INSTALL_PATH}/lib") - extra_include_paths = [] - extra_include_paths.append("./") - extra_include_paths.append(os.path.join(PYTORCH_NPU_INSTALL_PATH, "include")) - extra_include_paths.append(os.path.join(PYTORCH_NPU_INSTALL_PATH, "include/third_party/acl/inc")) - - module = torch.utils.cpp_extension.load(name="jit_extension", - sources=["../extension_add.cpp"], - extra_include_paths=extra_include_paths, - extra_cflags=["-g"], - extra_ldflags=extra_ldflags, - verbose=True) - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - res = module.add_custom(x.npu(), y.npu()) - - self.assertRtolEqual(res.npu(), (x + y)) - - def test_jit_compile_extension_with_cpp(self): - self._test_jit_compile_extension_with_cpp() - - -if __name__ == '__main__': - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/CMakeLists.txt b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/CMakeLists.txt deleted file mode 100644 index 94ccc96b5258817d4220e9535070d2176ba11444..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -cmake_minimum_required(VERSION 3.10) - -project(AddCustom) - -execute_process( - COMMAND python3 -c "import site; print(site.getsitepackages()[0])" - OUTPUT_VARIABLE python_site_packages_path -) -string(STRIP "${python_site_packages_path}" python_site_packages_path) - -set(CMAKE_CXX_FLAGS "-fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -fPIE -pie ${CMAKE_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS "-fabi-version=11 ${CMAKE_CXX_FLAGS}") -set(PYTORCH_INSTALL_PATH ${python_site_packages_path}/torch) -set(PYTORCH_NPU_INSTALL_PATH ${python_site_packages_path}/torch_npu) - -link_directories(${PYTORCH_INSTALL_PATH}/lib) -link_directories(${PYTORCH_NPU_INSTALL_PATH}/lib) - -add_library(AddCustom SHARED extension_add.cpp) - -target_compile_features(AddCustom PRIVATE cxx_std_17) -target_compile_options(AddCustom PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0) - -include_directories(${PYTORCH_NPU_INSTALL_PATH}/include/third_party/acl/inc) -include_directories(${PYTORCH_NPU_INSTALL_PATH}/include) -include_directories(${PYTORCH_INSTALL_PATH}/include) -include_directories(${PYTORCH_INSTALL_PATH}/include/torch/csrc/distributed) -include_directories(${PYTORCH_INSTALL_PATH}/include/torch/csrc/api/include) - -target_link_libraries(AddCustom PUBLIC c10 torch torch_cpu torch_npu) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/extension_add.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/extension_add.cpp deleted file mode 100644 index be522bd79fd06995fb207582e4a13b2ef3afbf2d..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/extension_add.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/** - * @file extension_add.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include - -#include "pytorch_npu_helper.hpp" -using torch::autograd::AutogradContext; -using torch::autograd::Function; -using tensor_list = std::vector; -using namespace at; - -// register forward implementation for NPU device -at::Tensor my_op_impl_npu(const at::Tensor &self, const at::Tensor &other) -{ - // alloc output memory - at::Tensor result = at::Tensor(self); - - // call aclnn interface to perform the computation - EXEC_NPU_CMD(aclnnAddCustom, self, other, result); - return result; -} - -// register backward implementation for NPU device -std::tuple my_op_backward_impl_npu(const at::Tensor &self) -{ - at::Tensor result = at::Tensor(self); // Create output memory - return {result, result}; -} - -// register forward implementation for Meta device -at::Tensor my_op_impl_meta(const at::Tensor &self, const at::Tensor &other) -{ - return empty_like(self); -} - -// register backward implementation for Meta device -std::tuple my_op_backward_impl_meta(const at::Tensor &self) -{ - auto result = empty_like(self); - return std::make_tuple(result, result); -} - -// implement forward and backward binding by inheriting the torch::autograd::Function class -class MyAddFunction : public torch::autograd::Function { -public: - static at::Tensor forward(AutogradContext *ctx, at::Tensor self, at::Tensor other) - { - at::AutoDispatchBelowADInplaceOrView guard; - return my_op_impl_npu(self, other); - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) - { - auto grad_output = grad_outputs[0]; - auto result = my_op_backward_impl_npu(grad_output); - return {std::get<0>(result), std::get<1>(result)}; - } -}; - -// call apply() method when using it -at::Tensor my_op_impl_autograd(const at::Tensor &self, const at::Tensor &other) -{ - return MyAddFunction::apply(self, other); -} - -// register the schemas for my_op and my_op_backward in the myops namespace -TORCH_LIBRARY(myops, m) -{ - m.def("my_op(Tensor self, Tensor other) -> Tensor"); - m.def("my_op_backward(Tensor self) -> (Tensor, Tensor)"); -} - -// register forward and backward implementations for the NPU device -// the device name used by the NPU device in PyTorch 2.1 and above is PrivateUse1. -// in versions below 2.1, XLA is used. If the version is below 2.1, PrivateUse1 needs to be changed to XLA. -TORCH_LIBRARY_IMPL(myops, PrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_npu); - m.impl("my_op_backward", &my_op_backward_impl_npu); -} - -// bind the NPU's autograd implementation to the operation -// if the version is below PyTorch 2.1, AutogradPrivateUse1 needs to be changed to AutogradXLA. -TORCH_LIBRARY_IMPL(myops, AutogradPrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_autograd); -} - -// register forward and backward implementations for the Meta device -TORCH_LIBRARY_IMPL(myops, Meta, m) -{ - m.impl("my_op", &my_op_impl_meta); - m.impl("my_op_backward", &my_op_backward_impl_meta); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/pytorch_npu_helper.hpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/pytorch_npu_helper.hpp deleted file mode 100644 index 0fa6d81984c79a4d21ce2c20c98a4ddecc51799f..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/pytorch_npu_helper.hpp +++ /dev/null @@ -1,560 +0,0 @@ -/** - * @file pytorch_npu_helper.hpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef PYTORCH_NPU_HELPER_HPP_ -#define PYTORCH_NPU_HELPER_HPP_ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "torch_npu/csrc/aten/NPUNativeFunctions.h" -#include "torch_npu/csrc/core/npu/NPUStream.h" -#include "torch_npu/csrc/framework/OpCommand.h" -#include "torch_npu/csrc/framework/interface/EnvVariables.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" -#include "torch_npu/csrc/framework/utils/OpPreparation.h" - -#define NPU_NAME_SPACE at_npu::native - -#define __FILENAME__ (strrchr("/" __FILE__, '/') + 1) - -typedef struct aclOpExecutor aclOpExecutor; -typedef struct aclTensor aclTensor; -typedef struct aclScalar aclScalar; -typedef struct aclIntArray aclIntArray; -typedef struct aclFloatArray aclFloatArray; -typedef struct aclBoolArray aclBoolArray; -typedef struct aclTensorList aclTensorList; - -typedef aclTensor *(*_aclCreateTensor)(const int64_t *view_dims, uint64_t view_dims_num, aclDataType data_type, - const int64_t *stride, int64_t offset, aclFormat format, - const int64_t *storage_dims, uint64_t storage_dims_num, void *tensor_data); -typedef aclScalar *(*_aclCreateScalar)(void *value, aclDataType data_type); -typedef aclIntArray *(*_aclCreateIntArray)(const int64_t *value, uint64_t size); -typedef aclFloatArray *(*_aclCreateFloatArray)(const float *value, uint64_t size); -typedef aclBoolArray *(*_aclCreateBoolArray)(const bool *value, uint64_t size); -typedef aclTensorList *(*_aclCreateTensorList)(const aclTensor *const *value, uint64_t size); - -typedef int (*_aclDestroyTensor)(const aclTensor *tensor); -typedef int (*_aclDestroyScalar)(const aclScalar *scalar); -typedef int (*_aclDestroyIntArray)(const aclIntArray *array); -typedef int (*_aclDestroyFloatArray)(const aclFloatArray *array); -typedef int (*_aclDestroyBoolArray)(const aclBoolArray *array); -typedef int (*_aclDestroyTensorList)(const aclTensorList *array); - -constexpr int kHashBufSize = 8192; -constexpr int kHashBufMaxSize = kHashBufSize + 1024; -extern thread_local char g_hashBuf[kHashBufSize]; -extern thread_local int g_hashOffset; - -#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ - _(at::ScalarType::Byte, ACL_UINT8) \ - _(at::ScalarType::Char, ACL_INT8) \ - _(at::ScalarType::Short, ACL_INT16) \ - _(at::ScalarType::Int, ACL_INT32) \ - _(at::ScalarType::Long, ACL_INT64) \ - _(at::ScalarType::Half, ACL_FLOAT16) \ - _(at::ScalarType::Float, ACL_FLOAT) \ - _(at::ScalarType::Double, ACL_DOUBLE) \ - _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ - _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ - _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ - _(at::ScalarType::Bool, ACL_BOOL) \ - _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ - _(at::ScalarType::BFloat16, ACL_BF16) \ - _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ - _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ - _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) - -constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { -#define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) -#undef DEFINE_ENUM -}; - -#define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) - -#define MEMCPY_TO_BUF(data_expression, size_expression) \ - if (g_hashOffset + (size_expression) > kHashBufSize) { \ - g_hashOffset = kHashBufMaxSize; \ - return; \ - } \ - memcpy(g_hashBuf + g_hashOffset, data_expression, size_expression); \ - g_hashOffset += size_expression; - -inline const char *GetOpApiLibName(void) -{ - return "libopapi.so"; -} - -inline const char *GetCustOpApiLibName(void) -{ - return "libcust_opapi.so"; -} - -inline void *GetOpApiFuncAddrInLib(void *handler, const char *libName, const char *apiName) -{ - auto funcAddr = dlsym(handler, apiName); - if (funcAddr == nullptr) { - ASCEND_LOGW("dlsym %s from %s failed, error:%s.", apiName, libName, dlerror()); - } - return funcAddr; -} - -inline void *GetOpApiLibHandler(const char *libName) -{ - auto handler = dlopen(libName, RTLD_LAZY); - if (handler == nullptr) { - ASCEND_LOGW("dlopen %s failed, error:%s.", libName, dlerror()); - } - return handler; -} - -inline void *GetOpApiFuncAddr(const char *apiName) -{ - static auto custOpApiHandler = GetOpApiLibHandler(GetCustOpApiLibName()); - if (custOpApiHandler != nullptr) { - auto funcAddr = GetOpApiFuncAddrInLib(custOpApiHandler, GetCustOpApiLibName(), apiName); - if (funcAddr != nullptr) { - return funcAddr; - } - } - - static auto opApiHandler = GetOpApiLibHandler(GetOpApiLibName()); - if (opApiHandler == nullptr) { - return nullptr; - } - return GetOpApiFuncAddrInLib(opApiHandler, GetOpApiLibName(), apiName); -} - -inline c10::Scalar ConvertTensorToScalar(const at::Tensor &tensor) -{ - c10::Scalar expScalar; - const at::Tensor *aclInput = &tensor; - if (aclInput->scalar_type() == at::ScalarType::Double) { - double value = *(double *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Long) { - int64_t value = *(int64_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Float) { - float value = *(float *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Int) { - int value = *(int *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Half) { - c10::Half value = *(c10::Half *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Bool) { - int8_t value = *(int8_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexDouble) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexFloat) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::BFloat16) { - c10::BFloat16 value = *(c10::BFloat16 *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } - return expScalar; -} - -inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) -{ - at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); - int deviceIndex = 0; - return cpuPinMemTensor.to(c10::Device(torch_npu::utils::get_npu_device_type(), deviceIndex), - cpuPinMemTensor.scalar_type(), true, true); -} - -inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) -{ - return CopyTensorHostToDevice(scalar_to_tensor(cpu_scalar).to(scalar_data_type)); -} - -inline aclTensor *ConvertType(const at::Tensor &at_tensor) -{ - static const auto aclCreateTensor = GET_OP_API_FUNC(aclCreateTensor); - if (aclCreateTensor == nullptr) { - return nullptr; - } - - if (!at_tensor.defined()) { - return nullptr; - } - at::ScalarType scalar_data_type = at_tensor.scalar_type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - c10::SmallVector storageDims; - // if acl_data_type is ACL_STRING, storageDims is empty. - auto itemsize = at_tensor.itemsize(); - if (itemsize == 0) { - AT_ERROR("When ConvertType, tensor item size of cannot be zero."); - return nullptr; - } - if (acl_data_type != ACL_STRING) { - storageDims.push_back(at_tensor.storage().nbytes() / itemsize); - } - - const auto dimNum = at_tensor.sizes().size(); - aclFormat format = ACL_FORMAT_ND; - switch (dimNum) { - case 3: - format = ACL_FORMAT_NCL; - break; - case 4: - format = ACL_FORMAT_NCHW; - break; - case 5: - format = ACL_FORMAT_NCDHW; - break; - default: - format = ACL_FORMAT_ND; - } - - if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { - c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); - at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); - return aclCreateTensor(aclInput.sizes().data(), aclInput.sizes().size(), acl_data_type, - aclInput.strides().data(), aclInput.storage_offset(), format, storageDims.data(), - storageDims.size(), const_cast(aclInput.storage().data())); - } - - auto acl_tensor = - aclCreateTensor(at_tensor.sizes().data(), at_tensor.sizes().size(), acl_data_type, at_tensor.strides().data(), - at_tensor.storage_offset(), format, storageDims.data(), storageDims.size(), - const_cast(at_tensor.storage().data())); - return acl_tensor; -} - -inline aclScalar *ConvertType(const at::Scalar &at_scalar) -{ - static const auto aclCreateScalar = GET_OP_API_FUNC(aclCreateScalar); - if (aclCreateScalar == nullptr) { - return nullptr; - } - - at::ScalarType scalar_data_type = at_scalar.type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - aclScalar *acl_scalar = nullptr; - switch (scalar_data_type) { - case at::ScalarType::Double: { - double value = at_scalar.toDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Long: { - int64_t value = at_scalar.toLong(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Bool: { - bool value = at_scalar.toBool(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::ComplexDouble: { - auto value = at_scalar.toComplexDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - default: - acl_scalar = nullptr; - break; - } - return acl_scalar; -} - -inline aclIntArray *ConvertType(const at::IntArrayRef &at_array) -{ - static const auto aclCreateIntArray = GET_OP_API_FUNC(aclCreateIntArray); - if (aclCreateIntArray == nullptr) { - return nullptr; - } - auto array = aclCreateIntArray(at_array.data(), at_array.size()); - return array; -} - -template inline aclBoolArray *ConvertType(const std::array &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclBoolArray *ConvertType(const at::ArrayRef &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclTensorList *ConvertType(const at::TensorList &at_tensor_list) -{ - static const auto aclCreateTensorList = GET_OP_API_FUNC(aclCreateTensorList); - if (aclCreateTensorList == nullptr) { - return nullptr; - } - - std::vector tensor_list(at_tensor_list.size()); - for (size_t i = 0; i < at_tensor_list.size(); i++) { - tensor_list[i] = ConvertType(at_tensor_list[i]); - } - auto acl_tensor_list = aclCreateTensorList(tensor_list.data(), tensor_list.size()); - return acl_tensor_list; -} - -inline aclTensor *ConvertType(const c10::optional &opt_tensor) -{ - if (opt_tensor.has_value() && opt_tensor.value().defined()) { - return ConvertType(opt_tensor.value()); - } - return nullptr; -} - -inline aclIntArray *ConvertType(const c10::optional &opt_array) -{ - if (opt_array.has_value()) { - return ConvertType(opt_array.value()); - } - return nullptr; -} - -inline aclScalar *ConvertType(const c10::optional &opt_scalar) -{ - if (opt_scalar.has_value()) { - return ConvertType(opt_scalar.value()); - } - return nullptr; -} - -inline aclDataType ConvertType(const at::ScalarType scalarType) -{ - return kATenScalarTypeToAclDataTypeTable[static_cast(scalarType)]; -} - -template T ConvertType(T value) -{ - return value; -} - -template -auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr, std::index_sequence) -{ - typedef int (*OpApiFunc)(typename std::decay(params))>::type...); - auto func = reinterpret_cast(opApiAddr); - return func; -} - -template auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr) -{ - static constexpr auto size = std::tuple_size::value; - return ConvertToOpApiFunc(params, opApiAddr, std::make_index_sequence{}); -} - -inline void Release(aclTensor *p) -{ - static const auto aclDestroyTensor = GET_OP_API_FUNC(aclDestroyTensor); - if (aclDestroyTensor == nullptr) { - return; - } - aclDestroyTensor(p); -} - -inline void Release(aclScalar *p) -{ - static const auto aclDestroyScalar = GET_OP_API_FUNC(aclDestroyScalar); - if (aclDestroyScalar == nullptr) { - return; - } - aclDestroyScalar(p); -} - -inline void Release(aclIntArray *p) -{ - static const auto aclDestroyIntArray = GET_OP_API_FUNC(aclDestroyIntArray); - if (aclDestroyIntArray == nullptr) { - return; - } - - aclDestroyIntArray(p); -} - -inline void Release(aclBoolArray *p) -{ - static const auto aclDestroyBoolArray = GET_OP_API_FUNC(aclDestroyBoolArray); - if (aclDestroyBoolArray == nullptr) { - return; - } - - aclDestroyBoolArray(p); -} - -inline void Release(aclTensorList *p) -{ - static const auto aclDestroyTensorList = GET_OP_API_FUNC(aclDestroyTensorList); - if (aclDestroyTensorList == nullptr) { - return; - } - - aclDestroyTensorList(p); -} - -template void Release(T value) -{ - (void)value; -} - -template void CallRelease(Tuple t, std::index_sequence) -{ - (void)std::initializer_list{(Release(std::get(t)), 0)...}; -} - -template void ReleaseConvertTypes(Tuple &t) -{ - static constexpr auto size = std::tuple_size::value; - CallRelease(t, std::make_index_sequence{}); -} - -template constexpr auto ConvertTypes(Ts &...args) -{ - return std::make_tuple(ConvertType(args)...); -} - -template auto call(Function f, Tuple t, std::index_sequence) -{ - return f(std::get(t)...); -} - -template auto call(Function f, Tuple t) -{ - static constexpr auto size = std::tuple_size::value; - return call(f, t, std::make_index_sequence{}); -} - -template void AddParamToBuf(const std::array &value) -{ - MEMCPY_TO_BUF(value.data(), value.size() * sizeof(bool)); -} - -template void AddParamToBuf(const T &value) -{ - MEMCPY_TO_BUF(&value, sizeof(T)); -} - -void AddParamToBuf(const at::Tensor &); -void AddParamToBuf(const at::Scalar &); -void AddParamToBuf(const at::IntArrayRef &); -void AddParamToBuf(const at::ArrayRef &); -void AddParamToBuf(const at::TensorList &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const at::ScalarType); -void AddParamToBuf(const string &); -void AddParamToBuf(); - -template void AddParamToBuf(const T &arg, Args &...args) -{ - AddParamToBuf(arg); - AddParamToBuf(args...); -} - -uint64_t CalcHashId(); -typedef int (*InitHugeMemThreadLocal)(void *, bool); -typedef void (*UnInitHugeMemThreadLocal)(void *, bool); -typedef void (*ReleaseHugeMem)(void *, bool); - -#define EXEC_NPU_CMD(aclnn_api, ...) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ - static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ - static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ - TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, #aclnn_api, " or ", \ - #aclnn_api "GetWorkspaceSize", " not in ", GetOpApiLibName(), ", or ", GetOpApiLibName(), \ - "not found."); \ - auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ - uint64_t workspace_size = 0; \ - uint64_t *workspace_size_addr = &workspace_size; \ - aclOpExecutor *executor = nullptr; \ - aclOpExecutor **executor_addr = &executor; \ - InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ - UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ - if (initMemFunc) { \ - initMemFunc(nullptr, false); \ - } \ - auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ - static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ - auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ - TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - void *workspace_addr = nullptr; \ - if (workspace_size != 0) { \ - at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ - auto workspace_tensor = at::empty({workspace_size}, options.dtype(kByte)); \ - workspace_addr = const_cast(workspace_tensor.storage().data()); \ - } \ - auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ - typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ - OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ - auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ - TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - ReleaseConvertTypes(converted_params); \ - ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ - if (releaseMemFunc) { \ - releaseMemFunc(nullptr, false); \ - } \ - return api_ret; \ - }; \ - at_npu::native::OpCommand cmd; \ - cmd.Name(#aclnn_api); \ - cmd.SetCustomHandler(acl_call); \ - cmd.Run(); \ - if (unInitMemFunc) { \ - unInitMemFunc(nullptr, false); \ - } \ - } while (false) - -#endif // PYTORCH_NPU_HELPER_HPP_ \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/run.sh deleted file mode 100644 index e39345e1cdfba1cc0bcd41636fdaa4c8b308ecff..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/run.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash - -set -e -rm -rf build -mkdir -p build -cmake -B build -cmake --build build -j -export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib:$LD_LIBRARY_PATH -( - cd test - python3 test_add_custom.py -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/test/test_add_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/test/test_add_custom.py deleted file mode 100644 index 18beae069b71d9605ab5f8378aa74f4ef7cbaddd..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/load_library/test/test_add_custom.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import torch -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests - -torch.npu.config.allow_internal_format = False - -torch.ops.load_library("../build/libAddCustom.so") - - -class TestCustomAdd(TestCase): - - def test_add_custom_ops(self): - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - - x_npu = x.npu() - y_npu = y.npu() - x_npu.requires_grad = True - y_npu.requires_grad = True - output = torch.ops.myops.my_op(x_npu, y_npu) - output.backward(output) - - x.requires_grad = True - y.requires_grad = True - cpuout = torch.add(x, y) - cpuout.backward(cpuout) - - self.assertRtolEqual(output, cpuout) - self.assertRtolEqual(x_npu.grad, x.grad) - self.assertRtolEqual(y_npu.grad, y.grad) - - -if __name__ == "__main__": - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add.cpp deleted file mode 100644 index d20ace755e4457d7fbd5dc06fbea3e16ed0f3508..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/** - * @file extension_add.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include - -#include "pytorch_npu_helper.hpp" -using torch::autograd::AutogradContext; -using torch::autograd::Function; -using tensor_list = std::vector; -using namespace at; - -// register forward implementation for NPU device -at::Tensor my_op_impl_npu(const at::Tensor &self, const at::Tensor &other) -{ - // alloc output memory - at::Tensor result = at::Tensor(self); - - // call aclnn interface to perform the computation - EXEC_NPU_CMD(aclnnAddCustom, self, other, result); - return result; -} - -// register backward implementation for NPU device -std::tuple my_op_backward_impl_npu(const at::Tensor &self) -{ - at::Tensor result = at::Tensor(self); // Create output memory - - return {result, result}; -} - -// register forward implementation for Meta device -at::Tensor my_op_impl_meta(const at::Tensor &self, const at::Tensor &other) -{ - return empty_like(self); -} - -// register backward implementation for Meta device -std::tuple my_op_backward_impl_meta(const at::Tensor &self) -{ - auto result = empty_like(self); - return std::make_tuple(result, result); -} - -// look up the implementation registered for different devices for this operation -at::Tensor my_op_impl(const at::Tensor &self, const at::Tensor &other) -{ - static auto op = torch::Dispatcher::singleton().findSchemaOrThrow("myops::my_op", "").typed(); - return op.call(self, other); -} - -// look up the implementation registered for different devices for this operation -std::tuple my_op_backward_impl(const at::Tensor &self) -{ - static auto op = torch::Dispatcher::singleton() - .findSchemaOrThrow("myops::my_op_backward", "") - .typed(); - return op.call(self); -} - -// implement forward and backward binding by inheriting the torch::autograd::Function class -class MyAddFunction : public torch::autograd::Function { -public: - static at::Tensor forward(AutogradContext *ctx, at::Tensor self, at::Tensor other) - { - at::AutoDispatchBelowADInplaceOrView guard; - return my_op_impl(self, other); - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) - { - auto grad_output = grad_outputs[0]; - auto result = my_op_backward_impl(grad_output); - return {std::get<0>(result), std::get<1>(result)}; - } -}; - -// call apply() method when using it -at::Tensor my_op_impl_autograd(const at::Tensor &self, const at::Tensor &other) -{ - return MyAddFunction::apply(self, other); -} - -// register forward and backward implementations for the NPU device -// the device name used by the NPU device in PyTorch 2.1 and above is PrivateUse1. -// in versions below 2.1, XLA is used. If the version is below 2.1, PrivateUse1 needs to be changed to XLA. -TORCH_LIBRARY_IMPL(myops, PrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_npu); - m.impl("my_op_backward", &my_op_backward_impl_npu); -} - -// bind the NPU's autograd implementation to the operation -// if the version is below PyTorch 2.1, AutogradPrivateUse1 needs to be changed to AutogradXLA. -TORCH_LIBRARY_IMPL(myops, AutogradPrivateUse1, m) -{ - m.impl("my_op", &my_op_impl_autograd); -} - -// register forward and backward implementations for the Meta device -TORCH_LIBRARY_IMPL(myops, Meta, m) -{ - m.impl("my_op", &my_op_impl_meta); - m.impl("my_op_backward", &my_op_backward_impl_meta); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add1.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add1.cpp deleted file mode 100644 index a232546bcb849b51d624c80eaf00ac074f385c17..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/extension_add1.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/** - * @file extension_add1.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include - -#include "pytorch_npu_helper.hpp" -using torch::autograd::AutogradContext; -using torch::autograd::Function; -using tensor_list = std::vector; -using namespace at; - -// register forward implementation for NPU device -at::Tensor my_op_impl_npu1(const at::Tensor &self, const at::Tensor &other) -{ - // alloc output memory - at::Tensor result = at::Tensor(self); - - // call aclnn interface to perform the computation - EXEC_NPU_CMD(aclnnAddCustom, self, other, result); - return result; -} - -// register backward implementation for NPU device -std::tuple my_op_backward_impl_npu1(const at::Tensor &self) -{ - at::Tensor result = at::Tensor(self); // Create output memory - - return {result, result}; -} - -// register forward implementation for Meta device -at::Tensor my_op_impl_meta1(const at::Tensor &self, const at::Tensor &other) -{ - return empty_like(self); -} - -// register backward implementation for Meta device -std::tuple my_op_backward_impl_meta1(const at::Tensor &self) -{ - auto result = empty_like(self); - return std::make_tuple(result, result); -} - -// look up the implementation registered for different devices for this operation -at::Tensor my_op_impl1(const at::Tensor &self, const at::Tensor &other) -{ - static auto op = - torch::Dispatcher::singleton().findSchemaOrThrow("myops::my_op1", "").typed(); - return op.call(self, other); -} - -// look up the implementation registered for different devices for this operation -std::tuple my_op_backward_impl1(const at::Tensor &self) -{ - static auto op = torch::Dispatcher::singleton() - .findSchemaOrThrow("myops::my_op_backward1", "") - .typed(); - return op.call(self); -} - -// implement forward and backward binding by inheriting the torch::autograd::Function class -class MyAddFunction1 : public torch::autograd::Function { -public: - static at::Tensor forward(AutogradContext *ctx, at::Tensor self, at::Tensor other) - { - at::AutoDispatchBelowADInplaceOrView guard; - return my_op_impl1(self, other); - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) - { - auto grad_output = grad_outputs[0]; - auto result = my_op_backward_impl1(grad_output); - return {std::get<0>(result), std::get<1>(result)}; - } -}; - -// call apply() method when using it -at::Tensor my_op_impl_autograd1(const at::Tensor &self, const at::Tensor &other) -{ - return MyAddFunction1::apply(self, other); -} - -// register forward and backward implementations for the NPU device -// the device name used by the NPU device in PyTorch 2.1 and above is PrivateUse1. -// in versions below 2.1, XLA is used. If the version is below 2.1, PrivateUse1 needs to be changed to XLA. -TORCH_LIBRARY_IMPL(myops, PrivateUse1, m) -{ - m.impl("my_op1", &my_op_impl_npu1); - m.impl("my_op_backward1", &my_op_backward_impl_npu1); -} - -// bind the NPU's autograd implementation to the operation -// if the version is below PyTorch 2.1, AutogradPrivateUse1 needs to be changed to AutogradXLA. -TORCH_LIBRARY_IMPL(myops, AutogradPrivateUse1, m) -{ - m.impl("my_op1", &my_op_impl_autograd1); -} - -// register forward and backward implementations for the Meta device -TORCH_LIBRARY_IMPL(myops, Meta, m) -{ - m.impl("my_op1", &my_op_impl_meta1); - m.impl("my_op_backward1", &my_op_backward_impl_meta1); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h deleted file mode 100644 index f728d30a1d4c5ae936e9e32c653db596177a2114..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/function.h +++ /dev/null @@ -1,18 +0,0 @@ -/** - * @file function.h - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef FUNCTION_H_ -#define FUNCTION_H_ - -#include - -at::Tensor my_op_impl_autograd(const at::Tensor &self, const at::Tensor &other); -at::Tensor my_op_impl_autograd1(const at::Tensor &self, const at::Tensor &other); - -#endif // FUNCTION_H_ diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/pytorch_npu_helper.hpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/pytorch_npu_helper.hpp deleted file mode 100644 index 3b98810f69ef2522697052f5d9f6f45bd32e50c0..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/pytorch_npu_helper.hpp +++ /dev/null @@ -1,694 +0,0 @@ -/** - * @file pytorch_npu_helper.hpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef PYTORCH_NPU_HELPER_HPP_ -#define PYTORCH_NPU_HELPER_HPP_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "torch_npu/csrc/aten/NPUNativeFunctions.h" -#include "torch_npu/csrc/core/npu/NPUStream.h" -#include "torch_npu/csrc/framework/OpCommand.h" -#include "torch_npu/csrc/framework/interface/EnvVariables.h" -#include "torch_npu/csrc/framework/utils/CalcuOpUtil.h" -#include "torch_npu/csrc/framework/utils/OpPreparation.h" - -#define NPU_NAME_SPACE at_npu::native - -#define __FILENAME__ (strrchr("/" __FILE__, '/') + 1) - -typedef struct aclOpExecutor aclOpExecutor; -typedef struct aclTensor aclTensor; -typedef struct aclScalar aclScalar; -typedef struct aclIntArray aclIntArray; -typedef struct aclFloatArray aclFloatArray; -typedef struct aclBoolArray aclBoolArray; -typedef struct aclTensorList aclTensorList; - -typedef aclTensor *(*_aclCreateTensor)(const int64_t *view_dims, uint64_t view_dims_num, aclDataType data_type, - const int64_t *stride, int64_t offset, aclFormat format, - const int64_t *storage_dims, uint64_t storage_dims_num, void *tensor_data); -typedef aclScalar *(*_aclCreateScalar)(void *value, aclDataType data_type); -typedef aclIntArray *(*_aclCreateIntArray)(const int64_t *value, uint64_t size); -typedef aclFloatArray *(*_aclCreateFloatArray)(const float *value, uint64_t size); -typedef aclBoolArray *(*_aclCreateBoolArray)(const bool *value, uint64_t size); -typedef aclTensorList *(*_aclCreateTensorList)(const aclTensor *const *value, uint64_t size); - -typedef int (*_aclDestroyTensor)(const aclTensor *tensor); -typedef int (*_aclDestroyScalar)(const aclScalar *scalar); -typedef int (*_aclDestroyIntArray)(const aclIntArray *array); -typedef int (*_aclDestroyFloatArray)(const aclFloatArray *array); -typedef int (*_aclDestroyBoolArray)(const aclBoolArray *array); -typedef int (*_aclDestroyTensorList)(const aclTensorList *array); - -constexpr int kHashBufSize = 8192; -constexpr int kHashBufMaxSize = kHashBufSize + 1024; -extern thread_local char g_hashBuf[kHashBufSize]; -extern thread_local int g_hashOffset; - -#define AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(_) \ - _(at::ScalarType::Byte, ACL_UINT8) \ - _(at::ScalarType::Char, ACL_INT8) \ - _(at::ScalarType::Short, ACL_INT16) \ - _(at::ScalarType::Int, ACL_INT32) \ - _(at::ScalarType::Long, ACL_INT64) \ - _(at::ScalarType::Half, ACL_FLOAT16) \ - _(at::ScalarType::Float, ACL_FLOAT) \ - _(at::ScalarType::Double, ACL_DOUBLE) \ - _(at::ScalarType::ComplexHalf, ACL_DT_UNDEFINED) \ - _(at::ScalarType::ComplexFloat, ACL_COMPLEX64) \ - _(at::ScalarType::ComplexDouble, ACL_COMPLEX128) \ - _(at::ScalarType::Bool, ACL_BOOL) \ - _(at::ScalarType::QInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt8, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QInt32, ACL_DT_UNDEFINED) \ - _(at::ScalarType::BFloat16, ACL_BF16) \ - _(at::ScalarType::QUInt4x2, ACL_DT_UNDEFINED) \ - _(at::ScalarType::QUInt2x4, ACL_DT_UNDEFINED) \ - _(at::ScalarType::Undefined, ACL_DT_UNDEFINED) \ - _(at::ScalarType::NumOptions, ACL_DT_UNDEFINED) - -static std::vector split_str(std::string s, const std::string &del) -{ - int end = s.find(del); - std::vector path_list; - while (end != -1) { - path_list.push_back(s.substr(0, end)); - s.erase(s.begin(), s.begin() + end + 1); - end = s.find(del); - } - path_list.push_back(s); - return path_list; -} - -static bool is_file_exist(const std::string &path) -{ - if (path.empty() || path.size() > PATH_MAX) { - return false; - } - return (access(path.c_str(), F_OK) == 0) ? true : false; -} - -inline std::string real_path(const std::string &path) -{ - if (path.empty() || path.size() > PATH_MAX) { - return ""; - } - char realPath[PATH_MAX] = {0}; - if (realpath(path.c_str(), realPath) == nullptr) { - return ""; - } - return std::string(realPath); -} - -inline std::vector get_custom_lib_path() -{ - char *ascend_custom_opppath = std::getenv("ASCEND_CUSTOM_OPP_PATH"); - std::vector custom_lib_path_list; - - if (ascend_custom_opppath == NULL) { - ASCEND_LOGW("ASCEND_CUSTOM_OPP_PATH is not exists"); - return std::vector(); - } - - std::string ascend_custom_opppath_str(ascend_custom_opppath); - // split string with ":" - custom_lib_path_list = split_str(ascend_custom_opppath_str, ":"); - if (custom_lib_path_list.empty()) { - return std::vector(); - } - for (auto &it : custom_lib_path_list) { - it = it + "/op_api/lib/"; - } - - return custom_lib_path_list; -} - -inline std::vector get_default_custom_lib_path() -{ - char *ascend_opp_path = std::getenv("ASCEND_OPP_PATH"); - std::vector default_vendors_list; - - if (ascend_opp_path == NULL) { - ASCEND_LOGW("ASCEND_OPP_PATH is not exists"); - return std::vector(); - } - - std::string vendors_path(ascend_opp_path); - vendors_path = vendors_path + "/vendors"; - std::string vendors_config_file = real_path(vendors_path + "/config.ini"); - if (vendors_config_file.empty()) { - ASCEND_LOGW("config.ini is not exists"); - return std::vector(); - } - - if (!is_file_exist(vendors_config_file)) { - ASCEND_LOGW("config.ini is not exists or the path length is more than %d", PATH_MAX); - return std::vector(); - } - - std::ifstream ifs(vendors_config_file); - std::string line; - while (std::getline(ifs, line)) { - if (line.find("load_priority=") == 0) { - break; - } - } - std::string head = "load_priority="; - line.erase(0, head.length()); - - // split string with "," - default_vendors_list = split_str(line, ","); - if (default_vendors_list.empty()) { - return std::vector(); - } - for (auto &it : default_vendors_list) { - it = real_path(vendors_path + "/" + it + "/op_api/lib/"); - } - - return default_vendors_list; -} - -const std::vector g_custom_lib_path = get_custom_lib_path(); -const std::vector g_default_custom_lib_path = get_default_custom_lib_path(); - -constexpr aclDataType kATenScalarTypeToAclDataTypeTable[static_cast(at::ScalarType::NumOptions) + 1] = { -#define DEFINE_ENUM(_1, n) n, - AT_ALL_SCALAR_TYPE_AND_ACL_DATATYPE_PAIR(DEFINE_ENUM) -#undef DEFINE_ENUM -}; - -#define GET_OP_API_FUNC(apiName) reinterpret_cast<_##apiName>(GetOpApiFuncAddr(#apiName)) - -#define MEMCPY_TO_BUF(data_expression, size_expression) \ - if (g_hashOffset + (size_expression) > kHashBufSize) { \ - g_hashOffset = kHashBufMaxSize; \ - return; \ - } \ - memcpy(g_hashBuf + g_hashOffset, data_expression, size_expression); \ - g_hashOffset += size_expression; - -inline const char *GetOpApiLibName(void) -{ - return "libopapi.so"; -} - -inline const char *GetCustOpApiLibName(void) -{ - return "libcust_opapi.so"; -} - -inline void *GetOpApiFuncAddrInLib(void *handler, const char *libName, const char *apiName) -{ - auto funcAddr = dlsym(handler, apiName); - if (funcAddr == nullptr) { - ASCEND_LOGW("dlsym %s from %s failed, error:%s.", apiName, libName, dlerror()); - } - return funcAddr; -} - -inline void *GetOpApiLibHandler(const char *libName) -{ - auto handler = dlopen(libName, RTLD_LAZY); - if (handler == nullptr) { - ASCEND_LOGW("dlopen %s failed, error:%s.", libName, dlerror()); - } - return handler; -} - -inline void *GetOpApiFuncAddr(const char *apiName) -{ - if (!g_custom_lib_path.empty()) { - for (auto &it : g_custom_lib_path) { - auto cust_opapi_lib = real_path(it + "/" + GetCustOpApiLibName()); - if (cust_opapi_lib.empty()) { - break; - } - auto custOpApiHandler = GetOpApiLibHandler(cust_opapi_lib.c_str()); - if (custOpApiHandler != nullptr) { - auto funcAddr = GetOpApiFuncAddrInLib(custOpApiHandler, GetCustOpApiLibName(), apiName); - if (funcAddr != nullptr) { - ASCEND_LOGI("%s is found in %s.", apiName, cust_opapi_lib.c_str()); - return funcAddr; - } - } - } - ASCEND_LOGI("%s is not in custom lib.", apiName); - } - - if (!g_default_custom_lib_path.empty()) { - for (auto &it : g_default_custom_lib_path) { - auto default_cust_opapi_lib = real_path(it + "/" + GetCustOpApiLibName()); - if (default_cust_opapi_lib.empty()) { - break; - } - auto custOpApiHandler = GetOpApiLibHandler(default_cust_opapi_lib.c_str()); - if (custOpApiHandler != nullptr) { - auto funcAddr = GetOpApiFuncAddrInLib(custOpApiHandler, GetCustOpApiLibName(), apiName); - if (funcAddr != nullptr) { - ASCEND_LOGI("%s is found in %s.", apiName, default_cust_opapi_lib.c_str()); - return funcAddr; - } - } - } - ASCEND_LOGI("%s is not in default custom lib.", apiName); - } - - static auto opApiHandler = GetOpApiLibHandler(GetOpApiLibName()); - if (opApiHandler == nullptr) { - return nullptr; - } - return GetOpApiFuncAddrInLib(opApiHandler, GetOpApiLibName(), apiName); -} - -inline c10::Scalar ConvertTensorToScalar(const at::Tensor &tensor) -{ - c10::Scalar expScalar; - const at::Tensor *aclInput = &tensor; - if (aclInput->scalar_type() == at::ScalarType::Double) { - double value = *(double *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Long) { - int64_t value = *(int64_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Float) { - float value = *(float *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Int) { - int value = *(int *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Half) { - c10::Half value = *(c10::Half *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::Bool) { - int8_t value = *(int8_t *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexDouble) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::ComplexFloat) { - c10::complex value = *(c10::complex *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } else if (aclInput->scalar_type() == at::ScalarType::BFloat16) { - c10::BFloat16 value = *(c10::BFloat16 *)aclInput->data_ptr(); - c10::Scalar scalar(value); - expScalar = scalar; - } - return expScalar; -} - -inline at::Tensor CopyTensorHostToDevice(const at::Tensor &cpu_tensor) -{ - at::Tensor cpuPinMemTensor = cpu_tensor.pin_memory(); - int deviceIndex = 0; - return cpuPinMemTensor.to(c10::Device(torch_npu::utils::get_npu_device_type(), deviceIndex), - cpuPinMemTensor.scalar_type(), true, true); -} - -inline at::Tensor CopyScalarToDevice(const c10::Scalar &cpu_scalar, at::ScalarType scalar_data_type) -{ - return CopyTensorHostToDevice(scalar_to_tensor(cpu_scalar).to(scalar_data_type)); -} - -inline aclTensor *ConvertType(const at::Tensor &at_tensor) -{ - static const auto aclCreateTensor = GET_OP_API_FUNC(aclCreateTensor); - if (aclCreateTensor == nullptr) { - return nullptr; - } - - if (!at_tensor.defined()) { - return nullptr; - } - at::ScalarType scalar_data_type = at_tensor.scalar_type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - c10::SmallVector storageDims; - // if acl_data_type is ACL_STRING, storageDims is empty. - auto itemsize = at_tensor.itemsize(); - if (itemsize == 0) { - AT_ERROR("When ConvertType, tensor item size of cannot be zero."); - return nullptr; - } - if (acl_data_type != ACL_STRING) { - storageDims.push_back(at_tensor.storage().nbytes() / itemsize); - } - - const auto dimNum = at_tensor.sizes().size(); - aclFormat format = ACL_FORMAT_ND; - switch (dimNum) { - case 3: - format = ACL_FORMAT_NCL; - break; - case 4: - format = ACL_FORMAT_NCHW; - break; - case 5: - format = ACL_FORMAT_NCDHW; - break; - default: - format = ACL_FORMAT_ND; - } - - if (at_tensor.unsafeGetTensorImpl()->is_wrapped_number()) { - c10::Scalar expScalar = ConvertTensorToScalar(at_tensor); - at::Tensor aclInput = CopyScalarToDevice(expScalar, scalar_data_type); - return aclCreateTensor(aclInput.sizes().data(), aclInput.sizes().size(), acl_data_type, - aclInput.strides().data(), aclInput.storage_offset(), format, storageDims.data(), - storageDims.size(), const_cast(aclInput.storage().data())); - } - - auto acl_tensor = - aclCreateTensor(at_tensor.sizes().data(), at_tensor.sizes().size(), acl_data_type, at_tensor.strides().data(), - at_tensor.storage_offset(), format, storageDims.data(), storageDims.size(), - const_cast(at_tensor.storage().data())); - return acl_tensor; -} - -inline aclScalar *ConvertType(const at::Scalar &at_scalar) -{ - static const auto aclCreateScalar = GET_OP_API_FUNC(aclCreateScalar); - if (aclCreateScalar == nullptr) { - return nullptr; - } - - at::ScalarType scalar_data_type = at_scalar.type(); - aclDataType acl_data_type = kATenScalarTypeToAclDataTypeTable[static_cast(scalar_data_type)]; - TORCH_CHECK(acl_data_type != ACL_DT_UNDEFINED, - std::string(c10::toString(scalar_data_type)) + " has not been supported") - aclScalar *acl_scalar = nullptr; - switch (scalar_data_type) { - case at::ScalarType::Double: { - double value = at_scalar.toDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Long: { - int64_t value = at_scalar.toLong(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::Bool: { - bool value = at_scalar.toBool(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - case at::ScalarType::ComplexDouble: { - auto value = at_scalar.toComplexDouble(); - acl_scalar = aclCreateScalar(&value, acl_data_type); - break; - } - default: - acl_scalar = nullptr; - break; - } - return acl_scalar; -} - -inline aclIntArray *ConvertType(const at::IntArrayRef &at_array) -{ - static const auto aclCreateIntArray = GET_OP_API_FUNC(aclCreateIntArray); - if (aclCreateIntArray == nullptr) { - return nullptr; - } - auto array = aclCreateIntArray(at_array.data(), at_array.size()); - return array; -} - -template inline aclBoolArray *ConvertType(const std::array &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclBoolArray *ConvertType(const at::ArrayRef &value) -{ - static const auto aclCreateBoolArray = GET_OP_API_FUNC(aclCreateBoolArray); - if (aclCreateBoolArray == nullptr) { - return nullptr; - } - - auto array = aclCreateBoolArray(value.data(), value.size()); - return array; -} - -inline aclTensorList *ConvertType(const at::TensorList &at_tensor_list) -{ - static const auto aclCreateTensorList = GET_OP_API_FUNC(aclCreateTensorList); - if (aclCreateTensorList == nullptr) { - return nullptr; - } - - std::vector tensor_list(at_tensor_list.size()); - for (size_t i = 0; i < at_tensor_list.size(); i++) { - tensor_list[i] = ConvertType(at_tensor_list[i]); - } - auto acl_tensor_list = aclCreateTensorList(tensor_list.data(), tensor_list.size()); - return acl_tensor_list; -} - -inline aclTensor *ConvertType(const c10::optional &opt_tensor) -{ - if (opt_tensor.has_value() && opt_tensor.value().defined()) { - return ConvertType(opt_tensor.value()); - } - return nullptr; -} - -inline aclIntArray *ConvertType(const c10::optional &opt_array) -{ - if (opt_array.has_value()) { - return ConvertType(opt_array.value()); - } - return nullptr; -} - -inline aclScalar *ConvertType(const c10::optional &opt_scalar) -{ - if (opt_scalar.has_value()) { - return ConvertType(opt_scalar.value()); - } - return nullptr; -} - -inline aclDataType ConvertType(const at::ScalarType scalarType) -{ - return kATenScalarTypeToAclDataTypeTable[static_cast(scalarType)]; -} - -template T ConvertType(T value) -{ - return value; -} - -template -auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr, std::index_sequence) -{ - typedef int (*OpApiFunc)(typename std::decay(params))>::type...); - auto func = reinterpret_cast(opApiAddr); - return func; -} - -template auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr) -{ - static constexpr auto size = std::tuple_size::value; - return ConvertToOpApiFunc(params, opApiAddr, std::make_index_sequence{}); -} - -inline void Release(aclTensor *p) -{ - static const auto aclDestroyTensor = GET_OP_API_FUNC(aclDestroyTensor); - if (aclDestroyTensor == nullptr) { - return; - } - aclDestroyTensor(p); -} - -inline void Release(aclScalar *p) -{ - static const auto aclDestroyScalar = GET_OP_API_FUNC(aclDestroyScalar); - if (aclDestroyScalar == nullptr) { - return; - } - aclDestroyScalar(p); -} - -inline void Release(aclIntArray *p) -{ - static const auto aclDestroyIntArray = GET_OP_API_FUNC(aclDestroyIntArray); - if (aclDestroyIntArray == nullptr) { - return; - } - - aclDestroyIntArray(p); -} - -inline void Release(aclBoolArray *p) -{ - static const auto aclDestroyBoolArray = GET_OP_API_FUNC(aclDestroyBoolArray); - if (aclDestroyBoolArray == nullptr) { - return; - } - - aclDestroyBoolArray(p); -} - -inline void Release(aclTensorList *p) -{ - static const auto aclDestroyTensorList = GET_OP_API_FUNC(aclDestroyTensorList); - if (aclDestroyTensorList == nullptr) { - return; - } - - aclDestroyTensorList(p); -} - -template void Release(T value) -{ - (void)value; -} - -template void CallRelease(Tuple t, std::index_sequence) -{ - (void)std::initializer_list{(Release(std::get(t)), 0)...}; -} - -template void ReleaseConvertTypes(Tuple &t) -{ - static constexpr auto size = std::tuple_size::value; - CallRelease(t, std::make_index_sequence{}); -} - -template constexpr auto ConvertTypes(Ts &...args) -{ - return std::make_tuple(ConvertType(args)...); -} - -template auto call(Function f, Tuple t, std::index_sequence) -{ - return f(std::get(t)...); -} - -template auto call(Function f, Tuple t) -{ - static constexpr auto size = std::tuple_size::value; - return call(f, t, std::make_index_sequence{}); -} - -template void AddParamToBuf(const std::array &value) -{ - MEMCPY_TO_BUF(value.data(), value.size() * sizeof(bool)); -} - -template void AddParamToBuf(const T &value) -{ - MEMCPY_TO_BUF(&value, sizeof(T)); -} - -void AddParamToBuf(const at::Tensor &); -void AddParamToBuf(const at::Scalar &); -void AddParamToBuf(const at::IntArrayRef &); -void AddParamToBuf(const at::ArrayRef &); -void AddParamToBuf(const at::TensorList &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const c10::optional &); -void AddParamToBuf(const at::ScalarType); -void AddParamToBuf(const string &); -void AddParamToBuf(); - -template void AddParamToBuf(const T &arg, Args &...args) -{ - AddParamToBuf(arg); - AddParamToBuf(args...); -} - -uint64_t CalcHashId(); -typedef int (*InitHugeMemThreadLocal)(void *, bool); -typedef void (*UnInitHugeMemThreadLocal)(void *, bool); -typedef void (*ReleaseHugeMem)(void *, bool); - -#define EXEC_NPU_CMD(aclnn_api, ...) \ - do { \ - static const auto getWorkspaceSizeFuncAddr = GetOpApiFuncAddr(#aclnn_api "GetWorkspaceSize"); \ - static const auto opApiFuncAddr = GetOpApiFuncAddr(#aclnn_api); \ - static const auto initMemAddr = GetOpApiFuncAddr("InitHugeMemThreadLocal"); \ - static const auto unInitMemAddr = GetOpApiFuncAddr("UnInitHugeMemThreadLocal"); \ - static const auto releaseMemAddr = GetOpApiFuncAddr("ReleaseHugeMem"); \ - TORCH_CHECK(getWorkspaceSizeFuncAddr != nullptr && opApiFuncAddr != nullptr, #aclnn_api, " or ", \ - #aclnn_api "GetWorkspaceSize", " not in ", GetOpApiLibName(), ", or ", GetOpApiLibName(), \ - "not found."); \ - auto acl_stream = c10_npu::getCurrentNPUStream().stream(false); \ - uint64_t workspace_size = 0; \ - uint64_t *workspace_size_addr = &workspace_size; \ - aclOpExecutor *executor = nullptr; \ - aclOpExecutor **executor_addr = &executor; \ - InitHugeMemThreadLocal initMemFunc = reinterpret_cast(initMemAddr); \ - UnInitHugeMemThreadLocal unInitMemFunc = reinterpret_cast(unInitMemAddr); \ - if (initMemFunc) { \ - initMemFunc(nullptr, false); \ - } \ - auto converted_params = ConvertTypes(__VA_ARGS__, workspace_size_addr, executor_addr); \ - static auto getWorkspaceSizeFunc = ConvertToOpApiFunc(converted_params, getWorkspaceSizeFuncAddr); \ - auto workspace_status = call(getWorkspaceSizeFunc, converted_params); \ - TORCH_CHECK(workspace_status == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - void *workspace_addr = nullptr; \ - if (workspace_size != 0) { \ - at::TensorOptions options = at::TensorOptions(torch_npu::utils::get_npu_device_type()); \ - auto workspace_tensor = at::empty({workspace_size}, options.dtype(kByte)); \ - workspace_addr = const_cast(workspace_tensor.storage().data()); \ - } \ - auto acl_call = [converted_params, workspace_addr, workspace_size, acl_stream, executor]() -> int { \ - typedef int (*OpApiFunc)(void *, uint64_t, aclOpExecutor *, const aclrtStream); \ - OpApiFunc opApiFunc = reinterpret_cast(opApiFuncAddr); \ - auto api_ret = opApiFunc(workspace_addr, workspace_size, executor, acl_stream); \ - TORCH_CHECK(api_ret == 0, "call " #aclnn_api " failed, detail:", aclGetRecentErrMsg()); \ - ReleaseConvertTypes(converted_params); \ - ReleaseHugeMem releaseMemFunc = reinterpret_cast(releaseMemAddr); \ - if (releaseMemFunc) { \ - releaseMemFunc(nullptr, false); \ - } \ - return api_ret; \ - }; \ - at_npu::native::OpCommand cmd; \ - cmd.Name(#aclnn_api); \ - cmd.SetCustomHandler(acl_call); \ - cmd.Run(); \ - if (unInitMemFunc) { \ - unInitMemFunc(nullptr, false); \ - } \ - } while (false) - -#endif // PYTORCH_NPU_HELPER_HPP_ diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/register.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/register.cpp deleted file mode 100644 index 80fa102cf7a40c2626ea170ea0e11090f182f69f..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/csrc/register.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @file register.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include -#include - -#include "function.h" - -// Register two schema: my_op and my_op_backward in the myops namespace -TORCH_LIBRARY(myops, m) -{ - m.def("my_op(Tensor self, Tensor other) -> Tensor"); - m.def("my_op_backward(Tensor self) -> (Tensor, Tensor)"); - m.def("my_op1(Tensor self, Tensor other) -> Tensor"); - m.def("my_op_backward1(Tensor self) -> (Tensor, Tensor)"); -} - -// bind c++ interface to python interface by pybind -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) -{ - m.def("add_custom", &my_op_impl_autograd, "x + y"); - m.def("add_custom1", &my_op_impl_autograd1, "x + y"); -} diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/__init__.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/__init__.py deleted file mode 100644 index f5ac2f5e936ffefaae1501d0656aa4a3453eef0b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import custom_ops_lib -from .add_custom import add_custom, add_custom1 diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/add_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/add_custom.py deleted file mode 100644 index 1222cc6db4da7f3aba0aeab307baae5e2e4b74eb..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/custom_ops/add_custom.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import custom_ops_lib - - -def add_custom(self, other): - return custom_ops_lib.add_custom(self, other) - - -def add_custom1(self, other): - return custom_ops_lib.add_custom1(self, other) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run.sh deleted file mode 100644 index 4e43cdad31708efa995cde062f9040282c56c77e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash - -set -e -rm -rf dist build -python3 setup.py build bdist_wheel -( - cd dist - pip3 install custom_ops-*.whl -) -( - cd test - python3 test_add_custom.py -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run_graph.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run_graph.sh deleted file mode 100644 index f94a529e153e71912debb33a06f5fee25ac8d6ab..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/run_graph.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash - -rm -rf dist build -python3 setup.py build bdist_wheel -( - cd dist - pip3 install custom_ops-*.whl -) -( - cd test - python3 test_add_custom_graph.py -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/setup.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/setup.py deleted file mode 100644 index 555f16a6d301d643bb2ba16d6e5eb57d1be448f6..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import os -import glob -import torch -from setuptools import setup, find_packages -from torch.utils.cpp_extension import BuildExtension - -import torch_npu -from torch_npu.utils.cpp_extension import NpuExtension - -PYTORCH_NPU_INSTALL_PATH = os.path.dirname(os.path.abspath(torch_npu.__file__)) -source_files = glob.glob(os.path.join("./csrc/", "*.cpp")) -USE_NINJA = os.getenv('USE_NINJA') == '1' - -exts = [] -ext1 = NpuExtension( - name="custom_ops_lib", - # 如果还有其他cpp文件参与编译,需要在这里添加 - sources=source_files, - extra_compile_args=[ - '-I' + - os.path.join(PYTORCH_NPU_INSTALL_PATH, "include/third_party/acl/inc"), - ], -) -exts.append(ext1) - -setup( - name="custom_ops", - version='1.0', - keywords='custom_ops', - ext_modules=exts, - packages=find_packages(), - cmdclass={"build_ext": BuildExtension.with_options(use_ninja=USE_NINJA)}, -) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom.py deleted file mode 100644 index a6bec4aa8d826f361755075ef5fdd3b6a5ece945..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import torch -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests -import custom_ops - -torch.npu.config.allow_internal_format = False - - -class TestCustomAdd(TestCase): - - def test_add_custom(self): - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - - x_npu = x.npu() - y_npu = y.npu() - x_npu.requires_grad = True - y_npu.requires_grad = True - output = custom_ops.add_custom(x_npu, y_npu) - output.backward(output) - - x.requires_grad = True - y.requires_grad = True - cpuout = torch.add(x, y) - cpuout.backward(cpuout) - - self.assertRtolEqual(output, cpuout) - self.assertRtolEqual(x_npu.grad, x.grad) - self.assertRtolEqual(y_npu.grad, y.grad) - - def test_add_custom_meta(self): - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - - x_input1 = x.to("meta") - y_input1 = y.to("meta") - x_input1.requires_grad = True - y_input1.requires_grad = True - custom_out = custom_ops.add_custom(x_input1, y_input1) - custom_out.backward(custom_out) - - x_input2 = x.to("meta") - y_input2 = y.to("meta") - x_input2.requires_grad = True - y_input2.requires_grad = True - cpuout = torch.add(x_input2, y_input2) - cpuout.backward(cpuout) - - self.assertTrue(custom_out.is_meta) - self.assertRtolEqual(custom_out.size(), cpuout.size()) - self.assertRtolEqual(x_input1.grad.size(), x_input2.grad.size()) - self.assertRtolEqual(y_input1.grad.size(), y_input2.grad.size()) - - -if __name__ == "__main__": - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom_graph.py b/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom_graph.py deleted file mode 100644 index d9b61f4984f0b3a4d88cb3c5d2572355123bdfd3..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/CppExtensions/setup/test/test_add_custom_graph.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, Callable -import torch -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests -import torchair -from torchair import register_fx_node_ge_converter -from torchair.ge import Tensor -import custom_ops - - -# 注意: meta_outputs形参名为固定写法,若写错会影响ge节点的输出dtype与shape推导 -@register_fx_node_ge_converter(torch.ops.myops.my_op.default) -def convert_npu_add_custom(x: Tensor, y: Tensor, z: Tensor = None, meta_outputs: Any = None): - return torchair.ge.custom_op( - "AddCustom", - inputs={ - "x": x, - "y": y, - }, - outputs=['z'] - ) - - -class TestCustomAdd(TestCase): - - def test_add_custom_graph(self): - - class PlugInAdd(torch.nn.Module): - - def __init__(self): - super().__init__() - - def forward(self, input1, input2): - return torch.ops.myops.my_op(input1, input2) - - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - - model = PlugInAdd().npu() - - import torchair - from torchair.configs.compiler_config import CompilerConfig - config = CompilerConfig() - npu_backend = torchair.get_npu_backend(compiler_config=config) - model = torch.compile(model, backend=npu_backend, dynamic=True) - - with torch.no_grad(): - output = model(x.npu(), y.npu()) - - cpuout = torch.add(x, y) - - self.assertRtolEqual(output, cpuout) - - -if __name__ == "__main__": - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/README.md deleted file mode 100644 index 9ed7557475bbc1e3580bdd2e2fb928eae747af46..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/README.md +++ /dev/null @@ -1,119 +0,0 @@ -## 概述 -本样例展示了自定义算子分别向pytorch注册eager模式与torch.compile模式的注册样例,eager模式与torch.compile模式的介绍参考:[Link](https://pytorch.org/get-started/pytorch-2.0)。 - -## 目录结构介绍 -``` -├── PytorchInvocation //通过pytorch调用的方式调用AddTemplateCustom模板参数算子 -│ ├── op_plugin_patch // op plugin实现目录 -│ │ ├── AddCustomKernelNpuApi.cpp // pytorch eager模式下算子实现,其中调用aclnn接口 -│ │ └── op_plugin_functions.yaml // pytorch注册的算子原型描述声明文件 -│ ├── run_op_plugin.sh // 编译pytorch_npu并执行用例的脚本 -│ ├── test_ops_custom.py // 执行eager模式下算子用例脚本 -│ └── test_ops_custom_register_in_graph.py // 执行torch.compile模式下用例脚本 -``` - -## 样例脚本run_op_plugin.sh关键步骤解析 - - - 下载PTA源码仓,并更新submodule,其中torchair、op_plugin是本示例中依赖的submodule - ```bash - git clone https://gitee.com/ascend/pytorch.git - git submodule init - git submodule update - git checkout -b v2.1.0 origin/v2.1.0 - ``` - - - PTA自定义算子注册, 本样例通过向op_plugin_functions.yaml中写入算子原型的方式注册pytorch自定义算子 - ```bash - FUNCTION_REGISTE_FIELD="op_plugin_patch/op_plugin_functions.yaml" - FUNCTION_REGISTE_FILE="${PYTORCH_DIR}/third_party/op-plugin/op_plugin/config/op_plugin_functions.yaml" - line=" - func: npu_add_custom(Tensor x, Tensor y) -> Tensor" - if ! grep -q "\ $line" $FUNCTION_REGISTE_FILE; then - sed -i "/custom:/r $FUNCTION_REGISTE_FIELD" $FUNCTION_REGISTE_FILE - fi - ``` - - - 编译PTA插件并安装,其中需要将编写好的单算子kernel文件AddCustomKernelNpuApi.cpp,拷贝至op-plugin目录下编译。 - (注:单算子kernel文件中调用aclnnAddCustom接口,需要提前完成Ascend C的算子注册) - ```bash - cp -rf op_plugin_patch/*.cpp ${PYTORCH_DIR}/third_party/op-plugin/op_plugin/ops/opapi - export DISABLE_INSTALL_TORCHAIR=FALSE - cd ${PYTORCH_DIR} - (bash ci/build.sh --python=${PYTHON_VERSION} --disable_rpc ; pip uninstall torch-npu -y ; pip3 install dist/*.whl) - ``` - -## 自定义算子入图关键步骤解析 - 可以在test_ops_custom_register_in_graph.py文件查看相关注册实现。 - - 注册自定义算子的meta实现 - ```python - from torch_npu.meta._meta_registrations import m - from torch.library import impl - @impl(m, "npu_add_custom") - def npu_add_custom_meta(x, y): - return torch.empty_like(x) - ``` - - - 根据Ascend C工程产生的REG_OP算子原型填充torchair.ge.custom_op的参数。 - - AddCustom的REG_OP原型为: - ```cpp - REG_OP(AddCustom) - .INPUT(x, ge::TensorType::ALL()) - .INPUT(y, ge::TensorType::ALL()) - .OUTPUT(z, ge::TensorType::ALL()) - .OP_END_FACTORY_REG(AddCustom); - ``` - - - 注册自定义算子converter - ```python - from torchair import register_fx_node_ge_converter - from torchair.ge import Tensor - @register_fx_node_ge_converter(torch.ops.npu.npu_add_custom.default) - def convert_npu_add_custom(x: Tensor, y: Tensor, z: Tensor = None, meta_outputs: Any = None): - return torchair.ge.custom_op( - "AddCustom", - inputs={ - "x": x, - "y": y, - }, - outputs=['z'] - ) - ``` - -## 运行样例算子 -该样例脚本基于Pytorch2.1、python3.9 运行 -### 1.编译算子工程 -运行此样例前,请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。 - -### 2.pytorch调用的方式调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation - ``` - - - 样例执行 - - 样例执行过程中会自动生成测试数据,然后运行pytorch样例,最后检验运行结果。具体过程可参见run_op_plugin.sh脚本。 - ```bash - bash run_op_plugin.sh - ``` - -#### 其他样例运行说明 - - 环境安装完成后,样例支持单独执行:eager模式与compile模式的测试用例 - - 执行pytorch eager模式的自定义算子测试文件 - ```bash - python3 test_ops_custom.py - ``` - - 执行pytorch torch.compile模式的自定义算子测试文件 - ```bash - python3 test_ops_custom_register_in_graph.py - ``` - -### 其他说明 - 更加详细的Pytorch适配算子开发指导可以参考[LINK](https://gitee.com/ascend/op-plugin/wikis)中的“算子适配开发指南”。 - -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/AddCustomKernelNpuApi.cpp b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/AddCustomKernelNpuApi.cpp deleted file mode 100755 index ae310874c2508c95ef89319d76ec76d451d5d645..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/AddCustomKernelNpuApi.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/** - * @file AddCustomKernelNpuApi.cpp - * - * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "op_plugin/AclOpsInterface.h" -#include "op_plugin/OpApiInterface.h" -#include "op_plugin/utils/op_api_common.h" - -namespace op_api { -using npu_preparation = at_npu::native::OpPreparation; - -at::Tensor npu_add_custom(const at::Tensor &x, const at::Tensor &y) -{ - at::Tensor result = npu_preparation::apply_tensor_without_format(x); // Create output memory - - // calculate the output result of the NPU - EXEC_NPU_CMD(aclnnAddCustom, x, y, result); - return result; -} -} // namespace op_api diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/op_plugin_functions.yaml b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/op_plugin_functions.yaml deleted file mode 100755 index d71088c55cbd81129743c50bb8ae183a8c608d7e..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/op_plugin_patch/op_plugin_functions.yaml +++ /dev/null @@ -1,2 +0,0 @@ - - func: npu_add_custom(Tensor x, Tensor y) -> Tensor - op_api: all_version diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/run_op_plugin.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/run_op_plugin.sh deleted file mode 100755 index 86e2dfdb2db14c57bcf87746346844976872c985..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/run_op_plugin.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH - -CURRENT_DIR=$( - cd $(dirname ${BASH_SOURCE:-$0}) - pwd -) -cd $CURRENT_DIR - -if [ ! $ASCEND_HOME_DIR ]; then - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - export ASCEND_HOME_DIR=$HOME/Ascend/ascend-toolkit/latest - else - export ASCEND_HOME_DIR=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $ASCEND_HOME_DIR/bin/setenv.bash - -# 当前示例使用Python-3.9版本 -PYTHON_VERSION=$(python3 -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1"."$2}') -if [ "$PYTHON_VERSION" != "3.9" ]; then - echo "Error: Python3 version is not 3.9" - exit 1 -fi -# 当前示例使用Pytorch-2.1.0版本 -PYTORCH_VESION=$(pip3 show torch | grep "Version:" | awk '{print $2}' | awk -F '.' '{print $1"."$2"."$3}' | awk -F '+' '{print $1}') -if [ "$PYTORCH_VESION" != "2.1.0" ]; then - echo "Error: Pytorch version is not 2.1.0" - exit 1 -fi -export HI_PYTHON=python${PYTHON_VERSION} -export PYTHONPATH=$ASCEND_HOME_DIR/python/site-packages:$PYTHONPATH -export PATH=$ASCEND_HOME_DIR/python/site-packages/bin:$PATH - -function main() { - # 1. 清除遗留生成文件和日志文件 - rm -rf $HOME/ascend/log/* - - # 2. 下载PTA源码仓,必须要git下载 - cd ${CURRENT_DIR} - PYTORCH_DIR="${CURRENT_DIR}/pytorch" - rm -rf $PYTORCH_DIR - git clone https://gitee.com/ascend/pytorch.git - - cd ${PYTORCH_DIR} - git submodule init - git submodule update - git checkout -b v2.1.0 origin/v2.1.0 - # 不编译Tensorpipe子仓,会存在编译依赖 - rm -rf "third_party/Tensorpipe/" - - cd ${CURRENT_DIR} - # 3. PTA自定义算子注册 - FUNCTION_REGISTE_FIELD="op_plugin_patch/op_plugin_functions.yaml" - FUNCTION_REGISTE_FILE="${PYTORCH_DIR}/third_party/op-plugin/op_plugin/config/op_plugin_functions.yaml" - line=" - func: npu_add_custom(Tensor x, Tensor y) -> Tensor" - if ! grep -q "\ $line" $FUNCTION_REGISTE_FILE; then - sed -i "/custom:/r $FUNCTION_REGISTE_FIELD" $FUNCTION_REGISTE_FILE - fi - - # 4. 编译PTA插件并安装 - cp -rf op_plugin_patch/*.cpp ${PYTORCH_DIR}/third_party/op-plugin/op_plugin/ops/opapi - export DISABLE_INSTALL_TORCHAIR=FALSE - cd ${PYTORCH_DIR} - (bash ci/build.sh --python=${PYTHON_VERSION} --disable_rpc ; pip uninstall torch-npu -y ; pip3 install dist/*.whl) - - # 5. 执行测试文件 - cd ${CURRENT_DIR} - export LD_LIBRARY_PATH=$ASCEND_OPP_PATH/vendors/customize/op_api/lib/:$LD_LIBRARY_PATH - python3 test_ops_custom.py - if [ $? -ne 0 ]; then - echo "ERROR: run custom op failed!" - return 1 - fi - echo "INFO: Ascend C Add Custom SUCCESS" - # 6. 执行测试文件 - python3 test_ops_custom_register_in_graph.py - if [ $? -ne 0 ]; then - echo "ERROR: run custom op in graph failed!" - return 1 - fi - echo "INFO: Ascend C Add Custom in torch.compile graph SUCCESS" - -} -main diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom.py deleted file mode 100644 index 5d1de8403bd2e86f58c386365cd1e926c5a1ff76..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import torch -import torch_npu -from torch_npu.testing.testcase import TestCase, run_tests - -torch.npu.config.allow_internal_format = False - - -class TestCustomAdd(TestCase): - - def test_add_custom(self): - length = [8, 2048] - x = torch.rand(length, device='cpu', dtype=torch.float16) - y = torch.rand(length, device='cpu', dtype=torch.float16) - print(x, '\n', y) - - torch.npu.synchronize() - output = torch_npu.npu_add_custom(x.npu(), y.npu()).cpu() - torch.npu.synchronize() - - print(output) - self.assertRtolEqual(output, x + y) - - -if __name__ == "__main__": - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom_register_in_graph.py b/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom_register_in_graph.py deleted file mode 100644 index a8f0954572769cc46b9e1a81006dd359533ddf8c..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/PytorchInvocation/test_ops_custom_register_in_graph.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -from typing import Any -import torch -from torch.library import impl -import torch_npu -import torchair -from torch_npu.testing.testcase import TestCase, run_tests -from torchair import register_fx_node_ge_converter -from torchair.ge import Tensor -from torch_npu.meta._meta_registrations import m - - -@impl(m, "npu_add_custom") -def npu_add_custom_meta(x, y): - return torch.empty_like(x) - - -# 注意: meta_outputs形参名为固定写法,若写错会影响ge节点的输出dtype与shape推导 -@register_fx_node_ge_converter(torch.ops.npu.npu_add_custom.default) -def convert_npu_add_custom(x: Tensor, y: Tensor, z: Tensor = None, meta_outputs: Any = None): - return torchair.ge.custom_op( - "AddCustom", - inputs={ - "x": x, - "y": y, - }, - outputs=['z'] - ) - - -class TestTorchCompileCustomAdd(TestCase): - - def test_add_custom(self): - from torchair.configs.compiler_config import CompilerConfig - config = CompilerConfig() - npu_backend = torchair.get_npu_backend(compiler_config=config) - length = [8, 2048] - x = torch.rand(length, device='npu', dtype=torch.float16) - y = torch.rand(length, device='npu', dtype=torch.float16) - print(x, '\n', y) - class Module(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch_npu.npu_add_custom(x, y) - mod = torch.compile(Module().npu(), backend=npu_backend) - output = mod(x, y) - print(output) - self.assertRtolEqual(output, (x + y)) - - -if __name__ == "__main__": - run_tests() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/README.md deleted file mode 100644 index f9f4e0d7897679c7652542d42c8aa0cdc5c6c3a6..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/README.md +++ /dev/null @@ -1,119 +0,0 @@ -## 概述 -本样例基于AddTemplateCustom算子工程,介绍了单算子工程、第三方框架调用。 -## 目录结构介绍 -``` -├── FrameworkLaunch //使用框架调用的方式调用Add算子 -│ ├── AclNNInvocation // 通过aclnn调用的方式调用AddTemplateCustom模板参数算子 -│ ├── AclNNInvocationNaive // 通过aclnn调用的方式调用AddTemplateCustom模板参数算子, 简化了编译脚本 -│ ├── AclOfflineModel // 通过aclopExecuteV2调用的方式调用AddTemplateCustom模板参数算子 -│ ├── AclOnlineModel // 通过aclopCompile调用的方式调用AddTemplateCustom模板参数算子 -│ ├── AddTemplateCustom // AddTemplateCustom模板参数算子工程 -│ ├── CppExtensions // Pybind方式调用AddTemplateCustom模板参数算子 -│ ├── PytorchInvocation // 通过pytorch调用的方式调用AddTemplateCustom模板参数算子 -│ ├── TensorflowInvocation // 通过tensorflow调用的方式调用AddTemplateCustom模板参数算子 -│ └── AddCustom.json // AddTemplateCustom模板参数算子的原型定义json文件 -``` -## 算子工程介绍 -其中,模板参数算子工程目录AddTemplateCustom包含算子实现的模板文件、编译脚本等,如下所示: -``` -├── AddTemplateCustom //Add自定义模板参数算子工程 -│ ├── cmake -│ ├── framework // 算子插件实现文件目录,单算子模型文件的生成不依赖算子适配插件,无需关注 -│ ├── op_host // host侧实现文件 -│ │ ├── add_custom.cpp // host侧tiling定义 -│ │ ├── add_custom_tiling.h // host侧tiling定义头文件 -│ │ ├── CMakeLists.txt // host侧的CMakeLists.txt -│ ├── op_kernel // kernel侧实现文件 -│ │ ├── add_custom.cpp // kernel侧算子实现文件 -│ │ ├── CMakeLists.txt // kernel侧的CMakeLists.txt -│ │ ├── tiling_key_add_custom.cpp // kernel侧模板参数定义头文件 -│ ├── scripts // 自定义算子工程打包相关脚本所在目录 -│ ├── build.sh // 编译入口脚本 -│ ├── CMakeLists.txt // 算子工程的CMakeLists.txt -│ └── CMakePresets.json // 编译配置项 -``` -CANN软件包中提供了工程创建工具msopgen,AddTemplateCustom算子工程可通过AddCustom.json自动创建,具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>算子开发>算子开发工程>基于自定义算子工程的算子开发>创建算子工程 章节。 - -算子实现的介绍请参考[AddTemplateCustom](./AddTemplateCustom/README.md)。 -## 编译运行样例算子 -针对自定义算子工程,编译运行包含如下步骤: -- 编译自定义算子工程生成算子安装包; -- 安装自定义算子到算子库中; -- 调用执行自定义算子; - -详细操作如下所示。 -### 1. 获取源码包 -编译运行此样例前,请参考[准备:获取样例代码](../README.md#codeready)完成源码包获取。 -### 2. 编译算子工程 - 编译自定义算子工程,构建生成自定义算子包。 - - - 执行如下命令,切换到算子工程AddTemplateCustom目录。 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom - ``` - - - 修改CMakePresets.json中ASCEND_CANN_PACKAGE_PATH为CANN软件包安装后的实际路径。 - - - ```json - { - …… - "configurePresets": [ - { - …… - "ASCEND_CANN_PACKAGE_PATH": { - "type": "PATH", - "value": "/usr/local/Ascend/ascend-toolkit/latest" //请替换为CANN软件包安装后的实际路径。eg:/home/HwHiAiUser/Ascend/ascend-toolkit/latest - }, - …… - } - ] - } - ``` - - 在算子工程AddTemplateCustom目录下执行如下命令,进行算子工程编译。 - - ```bash - ./build.sh - ``` -编译成功后,会在当前目录下创建build_out目录,并在build_out目录下生成自定义算子安装包custom_opp_\_\.run,例如“custom_opp_ubuntu_x86_64.run”。 - -备注:如果要使用dump调试功能,需要移除op_host内和CMakePresets.json内的Atlas 训练系列产品、Atlas 200/500 A2 推理产品的配置。 - -### 3. 部署算子包 - -执行如下命令,在自定义算子安装包所在路径下,安装自定义算子包。 - ```bash - cd build_out - ./custom_opp__.run - ``` -命令执行成功后,自定义算子包中的相关文件将部署至当前环境的OPP算子库的vendors/customize目录中。 - -### 4. 配置环境变量 - - 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量的命令。 - - 默认路径,root用户安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - ``` - - 默认路径,非root用户安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - ``` - - 指定路径install_path,安装CANN软件包 - ```bash - export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest - ``` -### 5. 调用执行算子工程 -- [aclnn调用AddTemplateCustom算子工程](./AclNNInvocation/README.md) -- [aclnn调用AddTemplateCustom算子工程(代码简化)](./AclNNInvocationNaive/README.md) -- [aclopExecuteV2模型调用AddTemplateCustom算子工程](./AclOfflineModel/README.md) -- [aclopCompile模型调用AddTemplateCustom算子工程](./AclOnlineModel/README.md) -- [cpp-extension模型调用AddTemplateCustom算子工程](./CppExtensions/README.md) -- [pytorch调用AddTemplateCustom算子工程](./PytorchInvocation/README.md) -- [tensorflow调用AddTemplateCustom算子工程](./TensorflowInvocation/AscendCustomToTensorFlowCustom/README.md) - -## 更新说明 -| 时间 | 更新事项 | -|------------|------------| -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/README.md deleted file mode 100644 index 3026ff2dc79e0a653cfe10d81243ff3e8ceeff26..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/README.md +++ /dev/null @@ -1,36 +0,0 @@ -## 概述 -本样例展示了如何使用Ascend C自定义算子AddTemplateCustom映射到TensorFlow自定义算子Add,并通过TensorFlow调用Ascend C算子。 - -## 运行样例算子 -### 1.编译算子工程 -运行此样例前,请参考[编译算子工程](../../README.md#operatorcompile)完成前期准备。 -### 2.tensorflow调用的方式调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn - ``` - - - 样例执行(tensorflow1.15) - - 样例执行过程中会自动生成随机测试数据,然后通过TensorFlow调用算子,最后对比cpu和aicore运行结果。具体过程可参见run_add_custom.py脚本。 - ```bash - python3 run_add_custom.py - ``` - - 样例执行(tensorflow2.x) - - 样例执行过程中会自动生成随机测试数据,然后通过TensorFlow调用算子,最后对比cpu和aicore运行结果。具体过程可参见run_add_custom_tf2.py脚本。 - ```bash - python3 run_add_custom_tf2.py - ``` - - 用户亦可参考run.sh脚本进行编译与运行。 - ```bash - bash run.sh - ``` - -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh deleted file mode 100644 index 0202346ce1c2b0dde45f58f6d1f74454bfded82a..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -if [ -n "$ASCEND_INSTALL_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH -elif [ -n "$ASCEND_HOME_PATH" ]; then - _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH -else - if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then - _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest - else - _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest - fi -fi -source $_ASCEND_INSTALL_PATH/bin/setenv.bash - -set -e -TENSORFLOW_VERSION=$(python3 -c "import tensorflow as tf; print(tf.__version__)") - -if [[ $TENSORFLOW_VERSION =~ ^1\..* ]]; then - python3 run_add_custom.py -elif [[ $TENSORFLOW_VERSION =~ ^2\..* ]]; then - python3 run_add_custom_tf2.py -else - echo "unknown version $TENSORFLOW_VERSION, or tensorflow not installed" -fi diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom.py b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom.py deleted file mode 100755 index 39ffe544ef42d1a22db5e65bd78be80cdee5fa32..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import tensorflow as tf # 导入TensorFlow开源库,本样例基于tf1.15编写 -from npu_bridge.estimator import npu_ops # 导入TensorFlow开源库中的npu_ops模块 -import numpy as np # 导入Python的数学基础库 -#np.allclose比较函数的相对公差参数 -atol = 0.001 -#np.allclose比较函数的绝对公差参数 -rtol = 0.001 - - -def config(execute_type): - if execute_type == 'ai_core': - session_config = tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=False, - ) - custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() - custom_op.name = "NpuOptimizer" - custom_op.parameter_map["enable_data_pre_proc"].b = True # 开启数据预处理下沉到Device侧执行 - custom_op.parameter_map["mix_compile_mode"].b = True - custom_op.parameter_map["use_off_line"].b = True # True表示在昇腾AI处理器上执行训练 - - elif execute_type == 'cpu': - session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) - - return session_config - - -def main(unused_argv): - shape_params = (8, 2048) - dtype_params = np.float16 - - # 构造Add算子的两个输入数据,shape为shape_params,范围在[-2,2]之间的随机数 - x_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - y_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - # 分别对Add算子的两个输入数据进行占位 - x = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - y = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - # 计算算子输出 - out = tf.math.add(x, y) - # 在Host侧CPU上运行单算子,得到期望运行结果 - with tf.compat.v1.Session(config=config('cpu')) as session: - result_cpu = session.run(out, feed_dict={x: x_data, y: y_data}) - # 在昇腾AI处理器上运行单算子,得到实际运行结果 - with tf.compat.v1.Session(config=config('ai_core')) as session: - result_ai_core = session.run(out, feed_dict={x: x_data, y: y_data}) - - np.array(result_ai_core).astype(dtype_params) - np.array(result_cpu).astype(dtype_params) - print('====================================') - # 通过np.allclose比较昇腾AI处理器上运行的实际结果和cpu上运行的期望结果,其中atol和rtol为np.allclose比较函数的相对公差参数和绝对公差参数,请见步骤3设置。 - cmp_result = np.allclose(result_ai_core, result_cpu, atol, rtol) - print(cmp_result) - print('====================================') - - -if __name__ == "__main__": - tf.app.run() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom_tf2.py b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom_tf2.py deleted file mode 100755 index b60f23b100c9beb8ea7f50a26a347498e188490d..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowBuildIn/run_add_custom_tf2.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Imports -import logging -import tensorflow as tf -import numpy as np - -tf.compat.v1.disable_v2_behavior() -tf.compat.v1.flags.DEFINE_string("local_log_dir", "output/train_logs.txt", "Log file path") -FLAGS = tf.compat.v1.flags.FLAGS -atol = 0.001 -rtol = 0.001 - - -def config(excute_type): - if excute_type == 'ai_core': - session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False) - custom_op = session_config.graph_options.rewrite_options.custom_optimizers.add() - custom_op.name = "NpuOptimizer" - custom_op.parameter_map["enable_data_pre_proc"].b = True - custom_op.parameter_map["mix_compile_mode"].b = True - custom_op.parameter_map["use_off_line"].b = True - custom_op.parameter_map["min_group_size"].b = 1 - - elif excute_type == 'cpu': - session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False) - - return session_config - - -def main(unused_argv): - shape_params = (8, 2048) - dtype_params = np.float16 - x_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - y_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - - x = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - y = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - out = tf.math.add(x, y) - with tf.compat.v1.Session(config=config('cpu')) as session: - result_cpu = session.run(out, feed_dict={x: x_data, y: y_data}) - with tf.compat.v1.Session(config=config('ai_core')) as session: - print("run npu") - result_ai_core = session.run(out, feed_dict={x: x_data, y: y_data}) - - np.array(result_ai_core).astype(dtype_params) - np.array(result_cpu).astype(dtype_params) - - print('====================================') - cmp_result = np.allclose(result_ai_core, result_cpu, atol, rtol) - print(cmp_result) - print('====================================') - - -if __name__ == "__main__": - tf.compat.v1.app.run() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/README.md b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/README.md deleted file mode 100644 index 58a1c8dbeb84a95c3dd58e51d338cd08b337d143..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/README.md +++ /dev/null @@ -1,44 +0,0 @@ -## 概述 -本样例展示了如何使用Ascend C自定义算子AddTemplateCustom映射到TensorFlow自定义算子AddTemplateCustom,并通过TensorFlow调用Ascend C算子。 - -## 运行样例算子 -### 1.编译算子工程 -运行此样例前,请参考[编译算子工程](../../README.md#operatorcompile)完成前期准备。 -需注意插件代码适配,路径为: samples/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/framework/tf_plugin/tensorflow_add_custom_plugin.cc -需修改插件代码中的TensorFlow调用算子名称OriginOpType为"AddCustom",如下所示: -```c++ -REGISTER_CUSTOM_OP("AddCustom") - .FrameworkType(TENSORFLOW) // type: CAFFE, TENSORFLOW - .OriginOpType("AddCustom") // name in tf module - .ParseParamsByOperatorFn(AutoMappingByOpFn); -``` - -### 2.TensorFlow调用的方式调用样例运行 - - - 进入到样例目录 - 以命令行方式下载样例代码,master分支为例。 - ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom - ``` - - 编译TensorFlow算子库 - ```bash - bash build.sh - ``` - - - 样例执行(TensorFlow1.15) - - 样例执行过程中会自动生成随机测试数据,然后通过TensorFlow调用算子,最后对比TensorFlow原生算子和Ascend C算子运行结果。具体过程可参见run_add_custom_tf_1.15.py脚本。 - ```bash - python3 run_add_custom_tf_1.15.py - ``` - - 样例执行(TensorFlow2.6.5) - 样例执行过程中会自动生成随机测试数据,然后通过TensorFlow调用算子,最后对比TensorFlow原生算子和Ascend C算子运行结果。具体过程可参见run_add_custom_tf_2.6.5.py脚本。 - ```bash - python3 run_add_custom_tf_2.6.5.py - ``` - - -## 更新说明 -| 时间 | 更新事项 | -| ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/RegisterTensorFlowCustomOp/custom_assign_add_custom.cc b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/RegisterTensorFlowCustomOp/custom_assign_add_custom.cc deleted file mode 100644 index dbe2be93f09c1bc8fe85ce82d87ac8f5f67dcfec..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/RegisterTensorFlowCustomOp/custom_assign_add_custom.cc +++ /dev/null @@ -1,37 +0,0 @@ -/** - * @file custom_assign_add_custom.cc - * - * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/common_shape_fns.h" - -using namespace tensorflow; - -// 注册TensorFlow自定义算子 -REGISTER_OP("AddCustom") // TensorFlow自定义算子名称 - .Input("x: T") // 输入tensor x - .Input("y: T") // 输入tensor y - .Output("z: T") // 输出tensor z - .Attr("T: {half}") // 属性T,支持half数据类型 - .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn); // 设置shape函数,用于推断输出tensor的shape,BroadcastBinaryOpShapeFn函数用于处理输入、输出tensor的shape相同的情况 - - -// TensorFlow自定义算子的CPU实现 -class AddCustomOp : public OpKernel { -public: - explicit AddCustomOp(OpKernelConstruction* context) : OpKernel(context) {} - // 当前算子不支持CPU设备,实现该函数以抛出异常,提示该算子不支持CPU设备 - void Compute(OpKernelContext* context) override { - OP_REQUIRES(context, false, errors::Unimplemented("AddCustomOp is not supported on CPU")); - } -}; - -// 注册TensorFlow自定义算子的CPU实现 -REGISTER_KERNEL_BUILDER(Name("AddCustom").Device(DEVICE_CPU), AddCustomOp); \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/build.sh b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/build.sh deleted file mode 100644 index 5641987e16f40e49222119c5fc75ab961d34885c..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#! /bin/bash -SCRIPT_DIR=$(dirname "(realpath "$0")") -cd $SCRIPT_DIR || exit - -rm -rf outputs -mkdir outputs - -TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) -TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) - -SOURCE_FILES=$(find . -name '*.cc') - -g++ -std=c++14 -shared $SOURCE_FILES -o outputs/libcustom_ops.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_1.15.py b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_1.15.py deleted file mode 100644 index 5c7b6e9c5a5507b1f476dd0fb5e842f43ad01874..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_1.15.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import os -import tensorflow as tf -import numpy as np -from npu_bridge.npu_init import * -tf.enable_resource_variables() - -#np.allclose比较函数的相对公差参数 -atol = 0.001 -#np.allclose比较函数的绝对公差参数 -rtol = 0.001 - -def main(unused_argv): - custom_op_lib = tf.load_op_library(os.path.join("./outputs/libcustom_ops.so")) # 加载自定义算子库 - # 定义输入数据 - shape_params = (8, 2048) - dtype_params = np.float16 - - x_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - y_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - - x = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - y = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - - tf_z = tf.math.add(x, y) - ac_z = custom_op_lib.add_custom(x, y) # 调用Ascend C AddTemplateCustom自定义算子 - - config = tf.ConfigProto() - custom_op = config.graph_options.rewrite_options.custom_optimizers.add() - custom_op.name = "NpuOptimizer" - config.graph_options.rewrite_options.remapping = RewriterConfig.OFF - config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF - - with tf.Session(config=config) as sess: - sess.run(tf.global_variables_initializer()) - tf_golden = sess.run(tf_z, feed_dict={x: x_data, y: y_data}) - - with tf.Session(config=config) as sess: - sess.run(tf.global_variables_initializer()) - ac_golden = sess.run(ac_z, feed_dict={x: x_data, y: y_data}) - - # 通过np.allclose函数比较TensorFlow和Ascend C的输出是否一致 - np.array(tf_golden).astype(dtype_params) - np.array(ac_golden).astype(dtype_params) - - cmp_result = np.allclose(tf_golden, ac_golden, atol=atol, rtol=rtol) - if cmp_result: - print("The result of tf and ac is the same.") - else: - print("The result of tf and ac is different.") - - -if __name__ == '__main__': - tf.app.run() \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_2.6.5.py b/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_2.6.5.py deleted file mode 100644 index 686ff638e6c6ec74b39d56ad765cc2c9445eae36..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/TensorflowInvocation/AscendCustomToTensorFlowCustom/run_add_custom_tf_2.6.5.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/python3 -# coding=utf-8 -# -# Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# =============================================================================== - -import os -import tensorflow as tf -import numpy as np -import npu_device -from npu_device.compat.v1.npu_init import * -npu_device.compat.enable_v1() -tf.compat.v1.enable_resource_variables() - - -#np.allclose比较函数的相对公差参数 -atol = 0.001 -#np.allclose比较函数的绝对公差参数 -rtol = 0.001 - -def main(unused_argv): - custom_op_lib = tf.load_op_library(os.path.join("./outputs/libcustom_ops.so")) # 加载自定义算子库 - # 定义输入数据 - shape_params = (8, 2048) - dtype_params = np.float16 - - x_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - y_data = np.random.uniform(-2, 2, size=shape_params).astype(dtype_params) - - x = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - y = tf.compat.v1.placeholder(dtype_params, shape=shape_params) - - tf_z = tf.math.add(x, y) - ac_z = custom_op_lib.add_custom(x, y) # 调用Ascend C AddTemplateCustom自定义算子 - - config = tf.compat.v1.ConfigProto() - custom_op = config.graph_options.rewrite_options.custom_optimizers.add() - custom_op.name = "NpuOptimizer" - config.graph_options.rewrite_options.remapping = RewriterConfig.OFF - config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF - - with tf.compat.v1.Session(config=config) as sess: - sess.run(tf.global_variables_initializer()) - tf_golden = sess.run(tf_z, feed_dict={x: x_data, y: y_data}) - - with tf.compat.v1.Session(config=config) as sess: - sess.run(tf.global_variables_initializer()) - ac_golden = sess.run(ac_z, feed_dict={x: x_data, y: y_data}) - - np.array(tf_golden).astype(dtype_params) - np.array(ac_golden).astype(dtype_params) - - # 通过np.allclose函数比较TensorFlow和Ascend C的输出是否一致 - cmp_result = np.allclose(tf_golden, ac_golden, atol=atol, rtol=rtol) - if cmp_result: - print("The result of tf and ac is the same.") - else: - print("The result of tf and ac is different.") - - -if __name__ == '__main__': - tf.app.run() \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/README.md b/operator/AddTemplateCustomSample/README.md deleted file mode 100644 index 58961a6af8474b1b2cf94bafd8806439f16c174b..0000000000000000000000000000000000000000 --- a/operator/AddTemplateCustomSample/README.md +++ /dev/null @@ -1,106 +0,0 @@ -## 带模板参数的Add自定义算子样例说明 -本样例通过Ascend C编程语言实现了带模板参数的Add算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 -- [FrameworkLaunch](./FrameworkLaunch):使用框架调用Add自定义算子。 - 按照工程创建->算子实现->编译部署>算子调用的流程完成算子开发。整个过程都依赖于算子工程:基于工程代码框架完成算子核函数的开发和Tiling实现,通过工程编译脚本完成算子的编译部署,继而实现单算子调用或第三方框架中的算子调用。 - -本样例中包含如下调用方式: - - - - - - - - - - - - - - - - - - - - - - - - - -
调用方式目录描述
FrameworkLaunch AclNNInvocation通过aclnn调用的方式调用AddTemplateCustom算子。
AclNNInvocationNaive通过aclnn调用的方式调用AddTemplateCustom算子, 简化了编译脚本。
AclOfflineModel通过aclopExecuteV2调用的方式调用AddTemplateCustom算子。
AclOnlineModel通过aclopCompile调用的方式调用AddTemplateCustom算子。
CppExtensionsPybind方式调用AddTemplateCustom算子。
PytorchInvocation通过pytorch调用的方式调用AddTemplateCustom算子。
TensorflowInvocation通过tensorflow调用的方式调用AddTemplateCustom算子。
- -## 算子描述 -Add算子实现了两个数据相加,返回相加结果的功能。本样例算子添加的模板参数包括输入的数据类型、数据格式、shape等,根据模板参数,简化或统一算子的实现逻辑,开发者可以在模板参数中定义需要的信息,如输入输出的数据类型,其他扩展参数等。对应的数学表达式为: -``` -z = x + y -``` -## 算子规格描述 - - - - - - - - - - - - - - - - - - -
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-float16,floatND,FRACTAL_NZ
y-float16,floatND,FRACTAL_NZ
算子输出z-float16,floatND,FRACTAL_NZ
核函数名add_custom
模板参数template<int dtype, int format, int shapeSize, int data>
dtypeint数据类型(float16,float)
formatint数据格式(ND,FRACTAL_NZ)
shapeSizeintshapeSize,根据shape大小进行不同的kernel的实现
dataint其他参数
- -## 支持的产品型号 -本样例支持如下产品型号: -- Atlas 训练系列产品 -- Atlas 推理系列产品(Ascend 310P处理器) -- Atlas A2训练系列产品/Atlas 800I A2推理产品 -- Atlas 200/500 A2推理产品 - -## 目录结构介绍 -``` -├── FrameworkLaunch //使用框架调用的方式调用Add自定义模板参数算子工程。 -``` -## 环境要求 -编译运行此样例前,请参考[《CANN软件安装指南》](https://hiascend.com/document/redirect/CannCommunityInstSoftware)完成开发运行环境的部署。 - -## 编译运行样例算子 - -### 1. 准备:获取样例代码 - - 可以使用以下两种方式下载,请选择其中一种进行源码准备。 - - - 命令行方式下载(下载时间较长,但步骤简单)。 - - ```bash - # 开发环境,非root用户命令行中执行以下命令下载源码仓。git_clone_path为用户自己创建的某个目录。 - cd ${git_clone_path} - git clone https://gitee.com/ascend/samples.git - ``` - **注:如果需要切换到其它tag版本,以v0.5.0为例,可执行以下命令。** - ```bash - git checkout v0.5.0 - ``` - - 压缩包方式下载(下载时间较短,但步骤稍微复杂)。 - - **注:如果需要下载其它版本代码,请先请根据前置条件说明进行samples仓分支切换。下载压缩包命名跟tag/branch相关,此处以master分支为例,下载的名字将会是samples-master.zip** - ```bash - # 1. samples仓右上角选择 【克隆/下载】 下拉框并选择 【下载ZIP】。 - # 2. 将ZIP包上传到开发环境中的普通用户某个目录中,【例如:${git_clone_path}/samples-master.zip】。 - # 3. 开发环境中,执行以下命令,解压zip包。 - cd ${git_clone_path} - unzip samples-master.zip - ``` -### 2. 编译运行样例工程 -- 若使用框架调用的方式,编译运行操作请参见[FrameworkLaunch](./FrameworkLaunch)。 -## 更新说明 -| 时间 | 更新事项 | -| ---------- |----------| -| 2024/10/25 | 新增模板参数算子样例 | diff --git a/operator/README.md b/operator/README.md index 573667b8922165b1d914aed406100724a671b4dc..2e2fe4a73851f6c6ccb86fd27ca206dfce654c4b 100644 --- a/operator/README.md +++ b/operator/README.md @@ -2,5 +2,4 @@ | 目录名称 | 功能描述 | | ------------------------------------------------------------ | ---------------------------------------------------- | | [AddCustomSample](./AddCustomSample) | 基于Ascend C的Add自定义Vector算子及调用样例(目录待移动)| -| [AddTemplateCustomSample](./AddTemplateCustomSample) | 基于Ascend C的Add(模板参数算子)自定义Vector算子及调用样例(目录待移动) | | [ascendc](./ascendc) | Ascend C算子调用样例 | diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/README.md b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/README.md similarity index 94% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/README.md rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/README.md index 747abc9e2dc151214062d6a3a891eb233892a4b1..3a490c170e30a49667e563d524c570cce6703d59 100644 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/README.md +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/README.md @@ -39,7 +39,7 @@ - 进入到样例目录 以命令行方式下载样例代码,master分支为例。 ```bash - cd ${git_clone_path}/samples/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation ``` - 样例执行 @@ -51,4 +51,5 @@ ## 更新说明 | 时间 | 更新事项 | | ---------- | ------------ | -| 2024/10/25 | 新增模板参数算子样例 | \ No newline at end of file +| 2024/10/25 | 新增模板参数算子样例 | +| 2024/11/18 | 样例目录调整 | \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/common.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/common.h similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/common.h rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/common.h diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/op_runner.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/op_runner.h similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/op_runner.h rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/op_runner.h diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/operator_desc.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/operator_desc.h similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/inc/operator_desc.h rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/inc/operator_desc.h diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/input/.keep b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/input/.keep similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/input/.keep rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/input/.keep diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/input/.keep b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/output/.keep similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclOfflineModel/input/.keep rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/output/.keep diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/run.sh b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh old mode 100644 new mode 100755 similarity index 76% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/run.sh rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh index e1cec058461dc3ce5c3b6cf3339c9039247e0f6f..57e8771dbb6f769b09663ce74dbe2e77ce96c8fa --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/run.sh +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/run.sh @@ -1,35 +1,8 @@ #!/bin/bash -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=0 - CURRENT_DIR=$( cd $(dirname ${BASH_SOURCE:-$0}) pwd ) -cd $CURRENT_DIR - -# 导出环境变量 -SHORT=v:, -LONG=dtype:, -OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") -eval set -- "$OPTS" -while :; do - case "$1" in - # float16, float, int32 - -v | --dtype) - DTYPE="$2" - shift 2 - ;; - --) - shift - break - ;; - *) - echo "[ERROR] Unexpected option: $1" - break - ;; - esac -done if [ -n "$ASCEND_INSTALL_PATH" ]; then _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH @@ -44,7 +17,7 @@ else fi source $_ASCEND_INSTALL_PATH/bin/setenv.bash export DDK_PATH=$_ASCEND_INSTALL_PATH -export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64 +export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/$(arch)-$(uname -s | tr '[:upper:]' '[:lower:]')/devlib function main { # 1. 清除遗留生成文件和日志文件 @@ -66,7 +39,7 @@ function main { rm -rf build mkdir -p build cd build - cmake ../src + cmake ../src -DCMAKE_SKIP_RPATH=TRUE if [ $? -ne 0 ]; then echo "ERROR: cmake failed!" return 1 @@ -80,6 +53,7 @@ function main { echo "INFO: make success!" # 4. 运行可执行文件 + export LD_LIBRARY_PATH=$_ASCEND_INSTALL_PATH/opp/vendors/customize/op_api/lib:$LD_LIBRARY_PATH cd $CURRENT_DIR/output echo "INFO: execute op!" ./execute_add_op diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/acl.json b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/acl.json similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/acl.json rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/acl.json diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/gen_data.py b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/gen_data.py similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/gen_data.py rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/gen_data.py diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/scripts/verify_result.py rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/scripts/verify_result.py diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/CMakeLists.txt b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/CMakeLists.txt similarity index 85% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/CMakeLists.txt rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/CMakeLists.txt index 730ba001358345ce4136a6bcfe22c45bebbb6234..8e9e45375c282c89b4b00bccd02b936aec2e48ec 100644 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/CMakeLists.txt +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/CMakeLists.txt @@ -27,8 +27,8 @@ set(LIB_PATH $ENV{NPU_HOST_LIB}) # Dynamic libraries in the stub directory can only be used for compilation if (NOT DEFINED ENV{NPU_HOST_LIB}) - set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/") - set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/") + string(TOLOWER "${CMAKE_SYSTEM_NAME}" SYSTEM_NAME_LOWER) + set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/${CMAKE_SYSTEM_PROCESSOR}-${SYSTEM_NAME_LOWER}/devlib") message(STATUS "set default LIB_PATH: ${LIB_PATH}") else () message(STATUS "env LIB_PATH: ${LIB_PATH}") @@ -36,16 +36,14 @@ endif() # Header path include_directories( - ${INC_PATH}/runtime/include - ${INC_PATH}/atc/include ../inc + ${INC_PATH}/include ${CUST_PKG_PATH}/include ) # add host lib path link_directories( ${LIB_PATH} - ${LIB_PATH1} ${CUST_PKG_PATH}/lib ) diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/common.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/common.cpp similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/common.cpp rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/common.cpp diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/main.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/main.cpp similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/main.cpp rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/main.cpp diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/op_runner.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/op_runner.cpp similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/op_runner.cpp rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/op_runner.cpp diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/operator_desc.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/operator_desc.cpp similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclNNInvocation/src/operator_desc.cpp rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AclNNInvocation/src/operator_desc.cpp diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddCustom.json b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json similarity index 100% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AddCustom.json rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom.json diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/README.md b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/README.md old mode 100755 new mode 100644 similarity index 76% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/README.md rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/README.md index 42f3bc8d6ca06cc3de17da1b57270522724218b6..2b24d0fe47dec181aabb9f6ec939d58fe4cd9091 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/README.md +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/README.md @@ -1,5 +1,5 @@ ### 1. tiling使用说明 - 请参考[tiling_key_add_custom.h](./op_kernel/tiling_key_add_custom.h)进行tiling配置。 + 请参考[tiling_key_add_custom.h](op_kernel/tiling_key_add_custom.h)进行tiling配置。 ### 2. 算子实现描述 - kernel侧 @@ -8,7 +8,7 @@ ``` #include "tiling_key_add_custom.h" …… - template + template __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling) { …… @@ -24,7 +24,7 @@ static ge::graphStatus TilingFunc(gert::TilingContext *context) { …… - const uint64_t tilingKey = GET_TPL_TILING_KEY(10, 15, 7, 1); // 模板参数tilingkey配置 + const uint64_t tilingKey = GET_TPL_TILING_KEY(D_T_X, D_T_Y, D_T_Z, TILE_NUM, IS_SPLIT); // 模板参数tilingkey配置 context->SetTilingKey(tilingKey); …… } diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp old mode 100755 new mode 100644 similarity index 59% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom.cpp rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp index 9f36d088be9395bbeaba91730f336fdd96af670b..faf223f02b3835a42d94c7ac45f7a126cf8f0b52 --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom.cpp +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom.cpp @@ -13,17 +13,43 @@ namespace optiling { const uint32_t BLOCK_DIM = 8; -const uint32_t TILE_NUM = 8; +const uint32_t DEFAULT_TILE_NUM = 8; +constexpr int MIN_LENGTH_FOR_SPLIT = 2048; static ge::graphStatus TilingFunc(gert::TilingContext *context) { TilingData tiling; uint32_t totalLength = context->GetInputShape(0)->GetOriginShape().GetShapeSize(); + ge::DataType dtype_x = context->GetInputDesc(0)->GetDataType(); + ge::DataType dtype_y = context->GetInputDesc(1)->GetDataType(); + ge::DataType dtype_z = context->GetOutputDesc(0)->GetDataType(); + uint32_t D_T_X = ADD_TPL_FP32, D_T_Y=ADD_TPL_FP32, D_T_Z=ADD_TPL_FP32, TILE_NUM=1, IS_SPLIT=0; + if(dtype_x == ge::DataType::DT_FLOAT){ + D_T_X = ADD_TPL_FP32; + }else if(dtype_x == ge::DataType::DT_FLOAT16){ + D_T_X = ADD_TPL_FP16; + } + if(dtype_y == ge::DataType::DT_FLOAT){ + D_T_Y = ADD_TPL_FP32; + }else if(dtype_y == ge::DataType::DT_FLOAT16){ + D_T_Y = ADD_TPL_FP16; + } + if(dtype_z == ge::DataType::DT_FLOAT){ + D_T_Z = ADD_TPL_FP32; + }else if(dtype_z == ge::DataType::DT_FLOAT16){ + D_T_Z = ADD_TPL_FP16; + } + if(totalLength< MIN_LENGTH_FOR_SPLIT){ + IS_SPLIT = 0; + TILE_NUM = 1; + }else{ + IS_SPLIT = 1; + TILE_NUM = DEFAULT_TILE_NUM; + } context->SetBlockDim(BLOCK_DIM); tiling.set_totalLength(totalLength); - tiling.set_tileNum(TILE_NUM); tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); - const uint64_t tilingKey = GET_TPL_TILING_KEY(10, 15, 7, 1); // 模板参数tilingkey配置 + const uint64_t tilingKey = GET_TPL_TILING_KEY(D_T_X, D_T_Y, D_T_Z, TILE_NUM, IS_SPLIT); // 模板参数tilingkey配置 context->SetTilingKey(tilingKey); size_t *currentWorkspace = context->GetWorkspaceSizes(1); currentWorkspace[0] = 0; @@ -55,16 +81,16 @@ public: { this->Input("x") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}); + .DataType({ge::DT_FLOAT16, ge::DT_FLOAT}) + .Format({ge::FORMAT_ND, ge::FORMAT_ND}); this->Input("y") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}); + .DataType({ge::DT_FLOAT16, ge::DT_FLOAT}) + .Format({ge::FORMAT_ND, ge::FORMAT_ND}); this->Output("z") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}); + .DataType({ge::DT_FLOAT16, ge::DT_FLOAT}) + .Format({ge::FORMAT_ND, ge::FORMAT_ND}); this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType); this->AICore() diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom_tiling.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h old mode 100755 new mode 100644 similarity index 93% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom_tiling.h rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h index 323f3076f0bc4a06fc661c67b0df66081403dea8..7e9e79d1d2b9b7da9fb2bec8d0914013dce3a59f --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_host/add_custom_tiling.h +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_host/add_custom_tiling.h @@ -14,7 +14,6 @@ namespace optiling { BEGIN_TILING_DATA_DEF(TilingData) TILING_DATA_FIELD_DEF(uint32_t, totalLength); -TILING_DATA_FIELD_DEF(uint32_t, tileNum); END_TILING_DATA_DEF; REGISTER_TILING_DATA_CLASS(AddCustom, TilingData) diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e67483876ff18a4757d75a81922bd91bc35854c9 --- /dev/null +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/add_custom.cpp @@ -0,0 +1,108 @@ +/** + * @file add_custom.cpp + * + * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ +#include "kernel_operator.h" +#include "tiling_key_add_custom.h" +constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue + +template +class KernelAdd { +public: + __aicore__ inline KernelAdd() {} + __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum) + { + this->blockLength = totalLength / AscendC::GetBlockNum(); + this->tileNum = tileNum; + if(tileNum == 1){ + this->tileLength = totalLength; + }else{ + this->tileLength = this->blockLength / tileNum / BUFFER_NUM; + } + xGm.SetGlobalBuffer((__gm__ dtypeX *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength); + yGm.SetGlobalBuffer((__gm__ dtypeY *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength); + zGm.SetGlobalBuffer((__gm__ dtypeZ *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength); + pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(dtypeX)); + pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(dtypeY)); + pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(dtypeZ)); + } + __aicore__ inline void Process1() + { + CopyIn(0); + Compute(0); + CopyOut(0); + } + + __aicore__ inline void Process2() + { + int32_t loopCount = this->tileNum * BUFFER_NUM; + for (int32_t i = 0; i < loopCount; i++) { + CopyIn(i); + Compute(i); + CopyOut(i); + } + } + +private: + __aicore__ inline void CopyIn(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.AllocTensor(); + AscendC::LocalTensor yLocal = inQueueY.AllocTensor(); + AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength); + AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength); + inQueueX.EnQue(xLocal); + inQueueY.EnQue(yLocal); + } + __aicore__ inline void Compute(int32_t progress) + { + AscendC::LocalTensor xLocal = inQueueX.DeQue(); + AscendC::LocalTensor yLocal = inQueueY.DeQue(); + AscendC::LocalTensor zLocal = outQueueZ.AllocTensor(); + AscendC::Add(zLocal, xLocal, yLocal, this->tileLength); + outQueueZ.EnQue(zLocal); + inQueueX.FreeTensor(xLocal); + inQueueY.FreeTensor(yLocal); + } + __aicore__ inline void CopyOut(int32_t progress) + { + AscendC::LocalTensor zLocal = outQueueZ.DeQue(); + AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength); + outQueueZ.FreeTensor(zLocal); + } + +private: + AscendC::TPipe pipe; + AscendC::TQue inQueueX, inQueueY; + AscendC::TQue outQueueZ; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; + AscendC::GlobalTensor zGm; + uint32_t blockLength; + uint32_t tileNum; + uint32_t tileLength; +}; + +template + __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling) +{ + GET_TILING_DATA(tiling_data, tiling); + if(D_T_X == ADD_TPL_FP32 && D_T_Y == ADD_TPL_FP32 && D_T_Z == ADD_TPL_FP32){ + KernelAdd op; + op.Init(x, y, z, tiling_data.totalLength, TILE_NUM); + op.Process1(); + }else if(D_T_X == ADD_TPL_FP16 && D_T_Y == ADD_TPL_FP16 && D_T_Z == ADD_TPL_FP16){ + KernelAdd op; + if(IS_SPLIT == 0){ + op.Init(x, y, z, tiling_data.totalLength, TILE_NUM); + op.Process1(); + }else if(IS_SPLIT == 1){ + op.Init(x, y, z, tiling_data.totalLength, TILE_NUM); + op.Process2(); + } + } +} \ No newline at end of file diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h old mode 100755 new mode 100644 similarity index 45% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h index 3f6b02c231edc6ab6c59c7fa9030078bca38f5b0..61dcb08ce34df1e6e7d0caca067efb71c4fe1b7e --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/AddTemplateCustom/op_kernel/tiling_key_add_custom.h @@ -13,14 +13,13 @@ #define ADD_TPL_FP16 10 #define ADD_TPL_FP32 20 -#define ADD_TPL_BP16 30 #define ADD_TPL_ND 15 #define ADD_TPL_NZ 25 /** ASCENDC_TPL_ARGS_DECL(args0, ...):算子的模板参数定义, args0表示算子唯一标识, 建议与opType保持一致,后续为若干个DTYPE、FORMAT、UINT、BOOL的模板参数定义 - ASCENDC_TPL_DTYPE_DECL(args0, ...): DTYPE类型的模板参数定义,args0表示参数名,后续若干个参数为穷举的DTYPE枚举值,DTYPE枚举值参见metadef对外定义的数据类型 - ASCENDC_TPL_FORMAT_DECL(args0, ...): FORMAT类型的模板参数定义,args0表示参数名,后续若干个参数为穷举的FORMAT枚举值,FORMAT枚举值参见metadef对外定义的数据类型 + ASCENDC_TPL_DTYPE_DECL(args0, ...): DTYPE类型的模板参数定义,args0表示参数名,后续若干个参数为穷举的DTYPE枚举值 + ASCENDC_TPL_FORMAT_DECL(args0, ...): FORMAT类型的模板参数定义,args0表示参数名,后续若干个参数为穷举的FORMAT枚举值 ASCENDC_TPL_UINT_DECL(args0, args1, args2, ...): UINT类型的模板参数定义 args0: 参数名 args1: 比特位,表示该UINT定义范围内数据的个数能用多少个比特位表示 @@ -28,41 +27,43 @@ ASCENDC_TPL_ARGS_DECL(args0, ...):算子的模板参数定义, args0表示算子 ASCENDC_TPL_UI_RANGE:范围模式,如果设置该模式,后续紧跟着第一个值表示范围个数,后续的每个数值为一组表示该范围的起、终位置;注意定义的范围个数要和后续的组数保持一致 ASCENDC_TPL_UI_LIST:穷举模式,如果设置该模式,则表示后续将穷举出所有的值;注意穷举的位置决定了后续tilingkey编码值,数值大小不会影响编码值 ASCENDC_TPL_UI_MIX:混合模式,如果设置该模式,则表示前n个数值为范围模式的参数定义,后m个数值为穷举模式的参数定义 - 实例说明1 ASCENDC_TPL_UI_RANGE,2,0,2,3,5:则表示2组范围参数,这2组范围为{0, 2}, {3, 5};因此该参数定义的UINT参数合法值为{0, 1, 2, 3, 4, 5},并且后续编码将以这个定义的顺序编码 - 实例说明2 ASCENDC_TPL_UI_LIST,10,12,13,9,8,7,6:则表示1组穷举参数,[10, 12, 13, 9, 8, 7, 6]为穷举值,因此该参数定义的UINT参数合法值为{10, 12, 13, 9, 8, 7, 6},并且后续编码将以这个定义的顺序编码 - 实例说明1 ASCENDC_TPL_UI_MIX,2,0,2,3, 5, 7, 6:则表示2组穷举参数,这2组范围为{0, 2}, {3, 5};[7, 6]为穷举值,因此该参数定义的UINT参数合法值为{0, 1, 2, 3, 4, 5, 7, 6},并且后续编码将以这个定义的顺序编码 + 实例说明1 ASCENDC_TPL_UI_RANGE,2, 0, 2, 3, 5:则表示2组范围参数,这2组范围为{0, 2}, {3, 5};因此该参数定义的UINT参数合法值为{0, 1, 2, 3, 4, 5},并且后续编码将以这个定义的顺序编码 + 实例说明2 ASCENDC_TPL_UI_LIST,10, 12, 13, 9, 8:则表示1组穷举参数,[10, 12, 13, 9, 8]为穷举值,因此该参数定义的UINT参数合法值为{10, 12, 13, 9, 8},并且后续编码将以这个定义的顺序编码 + 实例说明3 ASCENDC_TPL_UI_MIX,2, 0, 2, 3, 5, 10, 12, 13, 9, 8:则表示2组穷举参数,这2组范围为{0, 2}, {3, 5};[10, 12, 13, 9, 8]为穷举值,因此该参数定义的UINT参数合法值为{0, 1, 2, 3, 4, 5, 10, 12, 13, 9, 8},并且后续编码将以这个定义的顺序编码 【使用注意事项】: 1. tilingkey编码是以UNINT定义的位置进行编码,在更新ASCENDC_TPL_UINT_DECL参数定义是,一定要注意不影响原有的参数位置! 2. ASCENDC_TPL_UINT_DECL中的比特位一经定义后续不可更改(会影响其他参数的编码位置偏移,引起tilingkey兼容问题),所以用户在首次定义的比特位的时候需要考虑到后续的参数范围扩展 - ASCENDC_TPL_BOOL_DECL(args0, ...): BOOL类型的模板参数定义,args0表示参数名,后续可自定义true、false中的一个或两个值 - + ASCENDC_TPL_BOOL_DECL(args0, ...): bool类型的模板参数定义,args0表示参数名,后续可自定义true、false中的一个或两个值 */ -ASCENDC_TPL_ARGS_DECL(AddTemplateCustom, //唯一标识 -ASCENDC_TPL_DTYPE_DECL(X, ADD_TPL_FP16, ADD_TPL_BP16, ADD_TPL_FP32), //编码位宽位8比特 -ASCENDC_TPL_FORMAT_DECL(Y, ADD_TPL_ND, ADD_TPL_NZ), //编码位宽位4比特 -ASCENDC_TPL_UINT_DECL(Z, ASCENDC_TPL_8_BW, ASCENDC_TPL_UI_MIX, 2, 0, 2, 3, 5, 7, 6),//编码位宽位为用户定义的ASCENDC_TPL_8_BW比特,注意用户指定的位宽需要大于定义的参数个数 -ASCENDC_TPL_BOOL_DECL(S, 0, 1), //编码位宽位1比特 +ASCENDC_TPL_ARGS_DECL(AddTemplateCustom, //唯一标识,建议与OpType保持一致 +ASCENDC_TPL_DTYPE_DECL(D_T_X, ADD_TPL_FP16, ADD_TPL_FP32), //编码位宽为8比特 +ASCENDC_TPL_DTYPE_DECL(D_T_Y, ADD_TPL_FP16, ADD_TPL_FP32), +ASCENDC_TPL_DTYPE_DECL(D_T_Z, ADD_TPL_FP16, ADD_TPL_FP32), +ASCENDC_TPL_UINT_DECL(TILE_NUM, ASCENDC_TPL_8_BW, ASCENDC_TPL_UI_MIX, 2, 0, 2, 3, 5, 10, 12, 13, 9, 8),//编码位宽位为用户定义的ASCENDC_TPL_8_BW比特,注意用户指定的位宽需要大于定义的参数个数 +ASCENDC_TPL_BOOL_DECL(IS_SPLIT, 0, 1), //编码位宽为1比特 ); /** -ASCENDC_TPL_ARGS_DSEL(...):算子的模板参数组合,可设置多个参数组合 - ASCENDC_TPL_DTYPE_SEL(args0, ...): DTYPE类型的模板参数组合,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_DTYPE_DECL定义的参数范围子集 - ASCENDC_TPL_FORMAT_SEL(args0, ...): FORMAT类型的模板参数组合,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_FORMAT_DECL定义的参数范围子集 - ASCENDC_TPL_UINT_SEL(args0, args1, args2, ...): UINT类型的模板参数定义,args0表示参数名,args1是参数的表示类型,支持的表示类型为ASCENDC_TPL_UI_RANGE,ASCENDC_TPL_UI_LIST,ASCENDC_TPL_UI_MIX,后续的数值定义参考ASCENDC_TPL_UINT_DECL的规则 - ASCENDC_TPL_BOOL_SEL(args0, ...): BOOL类型的模板参数定义,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_BOOL_DECL定义的参数范围子集 - +ASCENDC_TPL_SEL(...):算子的模板参数整体组合,可设置多个模板参数组合 + ASCENDC_TPL_ARGS_DSEL(...):算子的模板参数组合,可设置多个参数组合 + ASCENDC_TPL_DTYPE_SEL(args0, ...): DTYPE类型的模板参数组合,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_DTYPE_DECL定义的参数范围子集 + ASCENDC_TPL_FORMAT_SEL(args0, ...): FORMAT类型的模板参数组合,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_FORMAT_DECL定义的参数范围子集 + ASCENDC_TPL_UINT_SEL(args0, args1, args2, ...): UINT类型的模板参数定义,args0表示参数名,args1是参数的表示类型,支持的表示类型为ASCENDC_TPL_UI_RANGE,ASCENDC_TPL_UI_LIST,ASCENDC_TPL_UI_MIX,后续的数值定义参考ASCENDC_TPL_UINT_DECL的规则 + ASCENDC_TPL_BOOL_SEL(args0, ...): bool类型的模板参数定义,args0表示参数名,后续若干个参数为对应的ASCENDC_TPL_BOOL_DECL定义的参数范围子集 */ ASCENDC_TPL_SEL( ASCENDC_TPL_ARGS_SEL( - ASCENDC_TPL_DTYPE_SEL(X, ADD_TPL_FP16, ADD_TPL_BP16), - ASCENDC_TPL_FORMAT_SEL(Y, ADD_TPL_ND), - ASCENDC_TPL_UINT_SEL(Z, ASCENDC_TPL_UI_LIST, 4, 7), - ASCENDC_TPL_BOOL_SEL(S, 0, 1), + ASCENDC_TPL_DTYPE_SEL(D_T_X, ADD_TPL_FP16), + ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP16), + ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP16), + ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8), + ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1), ), ASCENDC_TPL_ARGS_SEL( - ASCENDC_TPL_DTYPE_SEL(X, ADD_TPL_FP32), - ASCENDC_TPL_FORMAT_SEL(Y, ADD_TPL_ND,ADD_TPL_NZ), - ASCENDC_TPL_UINT_SEL(Z, ASCENDC_TPL_UI_LIST, 4, 6), - ASCENDC_TPL_BOOL_SEL(S, 0, 1), + ASCENDC_TPL_DTYPE_SEL(D_T_X, ADD_TPL_FP32), + ASCENDC_TPL_DTYPE_SEL(D_T_Y, ADD_TPL_FP32), + ASCENDC_TPL_DTYPE_SEL(D_T_Z, ADD_TPL_FP32), + ASCENDC_TPL_UINT_SEL(TILE_NUM, ASCENDC_TPL_UI_LIST, 1, 8), + ASCENDC_TPL_BOOL_SEL(IS_SPLIT, 0, 1), ), ); #endif \ No newline at end of file diff --git a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md index 1a42ac120c00f9cb14ba0c8f9b7e1baf5187fb68..ed68001852e918a38d0c3747aed30da555cfe155 100644 --- a/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/README.md @@ -1 +1,145 @@ -待补充 \ No newline at end of file +## 带模板参数的Add自定义算子样例说明 +本样例通过Ascend C编程语言实现了带模板参数的Add算子,并按照不同的算子调用方式分别给出了对应的端到端实现。 + +## 目录结构介绍 +``` +├── 6_addtemplate_frameworklaunch //使用框架调用的方式调用Add算子 +│ ├── AclNNInvocation // 通过aclnn调用的方式调用AddTemplateCustom模板参数算子 +│ ├── AddTemplateCustom // AddTemplateCustom模板参数算子工程 +│ ├── AddTemplateCustom.json // AddTemplateCustom模板参数算子的原型定义json文件 +│ └── install.sh // 脚本,调用msOpGen生成自定义算子工程,并编译 +``` + +## 算子描述 +Add算子实现了两个数据相加,返回相加结果的功能。本样例算子添加的模板参数包括输入的数据类型、shape等,根据模板参数,简化或统一算子的实现逻辑,开发者可以在模板参数中定义需要的信息,如输入输出的数据类型,其他扩展参数等。对应的数学表达式为: +``` +z = x + y +``` +## 算子规格描述 + + + + + + + + + + + + + + + + + + + +
算子类型(OpType)Add
算子输入nameshapedata typeformat
x-float16,floatND,FRACTAL_NZ
y-float16,floatND,FRACTAL_NZ
算子输出z-float16,floatND,FRACTAL_NZ
核函数名add_custom
模板参数template<int D_T_X, int D_T_Y, int D_T_Z, int TILE_NUM, int IS_SPLIT>
D_T_Xint数据类型(float16,float)
D_T_Yint数据类型(float16,float)
D_T_Zint数据类型(float16,float)
TILE_NUMint切分数量
IS_SPLITint是否切分
+ +## 支持的产品型号 +本样例支持如下产品型号: +- Atlas 训练系列产品 +- Atlas 推理系列产品AI Core +- Atlas A2训练系列产品/Atlas 800I A2推理产品 +- Atlas 200/500 A2推理产品 + +## 算子工程介绍 +其中,模板参数算子工程目录AddTemplateCustom包含算子实现文件,如下所示: +``` +├── AddTemplateCustom //AddTemplateCustom自定义模板参数算子工程 +│ ├── op_host // host侧实现文件 +│ │ ├── add_custom.cpp // host侧tiling定义 +│ │ ├── add_custom_tiling.h // host侧tiling定义头文件 +│ └──op_kernel // kernel侧实现文件 +│ ├── add_custom.cpp // kernel侧算子实现文件 +│ └── tiling_key_add_custom.cpp // kernel侧模板参数定义头文件 +``` +CANN软件包中提供了工程创建工具msopgen,AddTemplateCustom算子工程可通过AddTemplateCustom.json自动创建,自定义算子工程具体请参考[Ascend C算子开发](https://hiascend.com/document/redirect/CannCommunityOpdevAscendC)>工程化算子开发>创建算子工程 章节。 + +创建完带模板参数的自定义算子工程后,开发者重点需要完成算子host和kernel文件的功能开发。为简化样例运行流程,本样例已在AddTemplateCustom目录中准备好了必要的算子实现,install.sh脚本会创建一个CustomOp目录,并将算子实现文件复制到对应目录下,再编译算子。 + +备注:CustomOp目录为生成目录,每次执行install.sh脚本都会删除该目录并重新生成,切勿在该目录下编码算子,会存在丢失风险。 + +算子实现的介绍请参考[AddTemplateCustom](./AddTemplateCustom/README.md)。 + +## 编译运行样例算子 +针对自定义算子工程,编译运行包含如下步骤: +- 调用msOpGen工具生成自定义算子工程; +- 完成算子host和kernel实现; +- 编译自定义算子工程生成自定义算子包; +- 安装自定义算子包到自定义算子库中; +- 调用执行自定义算子; + +详细操作如下所示。 +### 1. 获取源码包 +编译运行此样例前,请参考[准备:获取样例代码](../README.md#codeready)完成源码包获取。 + +### 2. 生成自定义算子工程,复制host和kernel实现并编译算子 + - 切换到msOpGen脚本install.sh所在目录 + ```bash + # 若开发者以git命令行方式clone了master分支代码,并切换目录 + cd ${git_clone_path}/samples/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch + ``` + + - 调用脚本,生成自定义算子工程,复制host和kernel实现并编译算子 + - 方式一:配置环境变量运行脚本 + 请根据当前环境上CANN开发套件包的[安装方式](https://hiascend.com/document/redirect/CannCommunityInstSoftware),选择对应配置环境变量命令。 + - 默认路径,root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest + ``` + - 默认路径,非root用户安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest + ``` + - 指定路径install_path,安装CANN软件包 + ```bash + export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest + ``` + 运行install.sh脚本 + ```bash + bash install.sh -v [SOC_VERSION] + ``` + - 方式二:指定命令行安装路径来运行脚本 + ```bash + bash install.sh -v [SOC_VERSION] -i [ASCEND_INSTALL_PATH] + ``` + 参数说明: + - SOC_VERSION:昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如"Name"对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: + - Atlas 训练系列产品 + - Atlas 推理系列产品AI Core + - Atlas A2训练系列产品/Atlas 800I A2推理产品 + - Atlas 200/500 A2推理产品 + - ASCEND_INSTALL_PATH:CANN软件包安装路径 + + 脚本运行成功后,会在当前目录下创建CustomOp目录,编译完成后,会在CustomOp/build_out中,生成自定义算子安装包custom_opp_\_\.run,例如“custom_opp_ubuntu_x86_64.run”。 + + 备注:如果要使用dump调试功能,需要移除op_host内的Atlas 训练系列产品、Atlas 200/500 A2 推理产品的配置项。 + +### 3. 部署自定义算子包 +- 部署自定义算子包前,请确保存在自定义算子包默认部署路径环境变量ASCEND_OPP_PATH + ```bash + echo $ASCEND_OPP_PATH + # 输出示例 /usr/local/Ascend/ascend-toolkit/latest/opp + + # 若没有,则需导出CANN环境变量 + source [ASCEND_INSTALL_PATH]/bin/setenv.bash + # 例如 source /usr/local/Ascend/ascend-toolkit/latest/bin/setenv.bash + ``` + 参数说明: + - ASCEND_INSTALL_PATH:CANN软件包安装路径,一般和上一步中指定的路径保持一致 + +- 在自定义算子安装包所在路径下,执行如下命令安装自定义算子包 + ```bash + cd CustomOp/build_out + ./custom_opp__.run + ``` + 命令执行成功后,自定义算子包中的相关文件将部署至opp算子库环境变量ASCEND_OPP_PATH指向的的vendors/customize目录中。 +### 4. 调用执行算子工程 +- [aclnn调用AddTemplateCustom算子工程](./AclNNInvocation/README.md) +## 更新说明 +| 时间 | 更新事项 | +| ---------- |----------| +| 2024/10/25 | 新增模板参数算子样例 | +| 2024/11/18 | 样例目录调整 | diff --git a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/run.sh b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh old mode 100644 new mode 100755 similarity index 30% rename from operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/run.sh rename to operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh index 066e4eb363234c9e2625bd7a69eb5b88252e47a0..41f6be73c6af52cda124b0740bde4c01be546a0a --- a/operator/AddTemplateCustomSample/FrameworkLaunch/AclOnlineModel/run.sh +++ b/operator/ascendc/0_introduction/6_addtemplate_frameworklaunch/install.sh @@ -1,24 +1,17 @@ #!/bin/bash -export ASCEND_SLOG_PRINT_TO_STDOUT=0 -export ASCEND_GLOBAL_LOG_LEVEL=0 - -CURRENT_DIR=$( - cd $(dirname ${BASH_SOURCE:-$0}) - pwd -) -cd $CURRENT_DIR - -# 导出环境变量 -DTYPE="float" -SHORT=v:, -LONG=dtype:, +SHORT=v:,i:, +LONG=soc-version:,install-path:, OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") eval set -- "$OPTS" + while :; do case "$1" in - # float16, float, int32 - -v | --dtype) - DTYPE="$2" + -v | --soc-version) + SOC_VERSION="$2" + shift 2 + ;; + -i | --install-path) + ASCEND_INSTALL_PATH="$2" shift 2 ;; --) @@ -32,6 +25,12 @@ while :; do esac done +VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4" +if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then + echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]" + exit -1 +fi + if [ -n "$ASCEND_INSTALL_PATH" ]; then _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH elif [ -n "$ASCEND_HOME_PATH" ]; then @@ -44,59 +43,13 @@ else fi fi source $_ASCEND_INSTALL_PATH/bin/setenv.bash -export DDK_PATH=$_ASCEND_INSTALL_PATH -export NPU_HOST_LIB=$_ASCEND_INSTALL_PATH/lib64 - -function main { - # 1. 清除遗留生成文件和日志文件 - rm -rf $HOME/ascend/log/* - rm ./input/*.bin - rm ./output/*.bin - - # 2. 生成输入数据和真值数据 - cd $CURRENT_DIR - python3 scripts/gen_data.py - if [ $? -ne 0 ]; then - echo "ERROR: generate input data failed!" - return 1 - fi - echo "INFO: generate input data success!" - - # 3. 编译acl可执行文件 - cd $CURRENT_DIR - rm -rf build - mkdir -p build - cd build - cmake ../src - if [ $? -ne 0 ]; then - echo "ERROR: cmake failed!" - return 1 - fi - echo "INFO: cmake success!" - make - if [ $? -ne 0 ]; then - echo "ERROR: make failed!" - return 1 - fi - echo "INFO: make success!" - - # 4. 运行可执行文件 - cd $CURRENT_DIR/output - echo "INFO: execute op!" - ./execute_add_op - if [ $? -ne 0 ]; then - echo "ERROR: acl executable run failed! please check your project!" - return 1 - fi - echo "INFO: acl executable run success!" - - # 5. 比较真值文件 - cd $CURRENT_DIR - python3 scripts/verify_result.py output/output_z.bin output/golden.bin - if [ $? -ne 0 ]; then - echo "ERROR: verify result failed!" - return 1 - fi -} - -main +export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH + +OP_NAME=AddTemplateCustom +rm -rf CustomOp +# Generate the op framework +msopgen gen -i $OP_NAME.json -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp +# Copy op implementation files to CustomOp +cp -rf $OP_NAME/* CustomOp +# Build CustomOp project +(cd CustomOp && bash build.sh) \ No newline at end of file