From 5aec59e330e4f28e9489fc36889c1d175e22c4c1 Mon Sep 17 00:00:00 2001
From: zgx950813 <zhugexun@hisilicon.com>
Date: Mon, 9 Sep 2024 17:16:03 +0800
Subject: [PATCH] add testcase for simplify aclnn

---
 .../AclNNInvocationNaive/CMakeLists.txt       |  62 +++++++
 .../AclNNInvocationNaive/README.md            |  60 +++++++
 .../AclNNInvocationNaive/main.cpp             | 167 ++++++++++++++++++
 .../AclNNInvocationNaive/run.sh               |  82 +++++++++
 .../FrameworkLaunchOpGen/AddCustom.json       |  40 +++++
 .../AddCustom/op_host/add_custom.cpp          |  76 ++++++++
 .../AddCustom/op_host/add_custom_tiling.h     |  22 +++
 .../AddCustom/op_kernel/add_custom.cpp        |  93 ++++++++++
 .../FrameworkLaunchOpGen/README.md            |   6 +
 .../FrameworkLaunchOpGen/install.sh           |  51 ++++++
 10 files changed, 659 insertions(+)
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/CMakeLists.txt
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/README.md
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/main.cpp
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/run.sh
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom.json
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom.cpp
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom_tiling.h
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_kernel/add_custom.cpp
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/README.md
 create mode 100644 operator/AddCustomSample/FrameworkLaunchOpGen/install.sh

diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/CMakeLists.txt b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/CMakeLists.txt
new file mode 100644
index 000000000..7457b7c3e
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/CMakeLists.txt
@@ -0,0 +1,62 @@
+# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+
+# CMake lowest version requirement
+cmake_minimum_required(VERSION 3.5.1)
+
+# project information
+project(acl_execute_add)
+
+# Compile options
+add_compile_options(-std=c++11)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "./")
+
+set(INC_PATH $ENV{DDK_PATH})
+
+if (NOT DEFINED ENV{DDK_PATH})
+    set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest")
+    message(STATUS "set default INC_PATH: ${INC_PATH}")
+else ()
+    message(STATUS "env INC_PATH: ${INC_PATH}")
+endif()
+
+set(CUST_PKG_PATH "/root/z00557504/samples_smoke/operator/AddCustomSample/FrameworkLaunchOpGen/CustomOp/build_out/op_api")
+
+set(LIB_PATH $ENV{NPU_HOST_LIB})
+
+# Dynamic libraries in the stub directory can only be used for compilation
+if (NOT DEFINED ENV{NPU_HOST_LIB})
+    set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
+    set(LIB_PATH1 "/usr/local/Ascend/ascend-toolkit/latest/atc/lib64/stub/")
+    message(STATUS "set default LIB_PATH: ${LIB_PATH}")
+else ()
+    message(STATUS "env LIB_PATH: ${LIB_PATH}")
+endif()
+
+# Header path
+include_directories(
+    ${INC_PATH}/runtime/include
+    ${INC_PATH}/atc/include
+    ${CUST_PKG_PATH}/include
+)
+
+# add host lib path
+link_directories(
+    ${LIB_PATH}
+    ${LIB_PATH1}
+    ${CUST_PKG_PATH}/lib
+)
+
+add_executable(execute_add_op
+    main.cpp
+)
+
+target_link_libraries(execute_add_op
+    ascendcl
+    cust_opapi
+    acl_op_compiler
+    nnopbase
+    stdc++
+)
+
+install(TARGETS execute_add_op DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/README.md b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/README.md
new file mode 100644
index 000000000..75c02cebe
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/README.md
@@ -0,0 +1,60 @@
+## 概述
+本样例相比于AclNNInvocation样例工程，简化了工程配置。
+## 目录结构介绍
+```
+├── AclNNInvocationNaive
+│   ├── CMakeLists.txt      // 编译规则文件
+│   ├── main.cpp            // 单算子调用应用的入口
+│   └── run.sh              // 编译运行算子的脚本
+```
+## 代码实现介绍
+完成自定义算子的开发部署后，可以通过单算子调用的方式来验证单算子的功能。main.cpp代码为单算子API执行方式。单算子API执行是基于C语言的API执行算子，无需提供单算子描述文件进行离线模型的转换，直接调用单算子API接口。
+
+自定义算子编译部署后，会自动生成单算子API，可以直接在应用程序中调用。算子API的形式一般定义为“两段式接口”，形如：
+   ```cpp
+   // 获取算子使用的workspace空间大小
+   aclnnStatus aclnnAddCustomGetWorkspaceSize(const aclTensor *x, const aclTensor *y, const alcTensor *out, uint64_t workspaceSize, aclOpExecutor **executor);
+   // 执行算子
+   aclnnStatus aclnnAddCustom(void *workspace, int64_t workspaceSize, aclOpExecutor **executor, aclrtStream stream);
+   ```
+其中aclnnAddCustomGetWorkspaceSize为第一段接口，主要用于计算本次API调用计算过程中需要多少的workspace内存。获取到本次API计算需要的workspace大小之后，按照workspaceSize大小申请Device侧内存，然后调用第二段接口aclnnAddCustom执行计算。具体参考[AscendCL单算子调用](https://hiascend.com/document/redirect/CannCommunityAscendCInVorkSingleOp)>单算子API执行 章节。
+## 运行样例算子
+### 1. 编译算子工程
+运行此样例前，请参考[编译算子工程](../README.md#operatorcompile)完成前期准备。
+### 2. aclnn调用样例运行
+
+  - 进入到样例目录   
+    以命令行方式下载样例代码，master分支为例。
+    ```bash
+    cd ${git_clone_path}/samples/operator/AddCustomSample/FrameworkLaunch/AclNNInvocationNaive
+    ```
+  - 样例编译文件修改
+
+    将CMakeLists.txt文件内"/usr/local/Ascend/ascend-toolkit/latest"替换为CANN软件包安装后的实际路径。  
+    eg:/home/HwHiAiUser/Ascend/ascend-toolkit/latest
+
+  - 环境变量配置
+
+    需要设置NPU_HOST_LIB环境变量，以x86为例
+    ```bash
+    export NPU_HOST_LIB=/home/HwHiAiUser/Ascend/ascend-toolkit/latest/x86_64-linux/lib64
+    ```
+  - 样例执行
+
+    样例执行过程中会自动生成测试数据，然后编译与运行aclnn样例，最后打印运行结果。
+    ```bash
+    mkdir -p build
+    cd build
+    cmake .. && make
+    ./execute_add_op
+    ```
+
+    用户亦可参考run.sh脚本进行编译与运行。
+    ```bash
+    bash run.sh
+    ```
+
+## 更新说明
+| 时间       | 更新事项     |
+| ---------- | ------------ |
+| 2024/05/22 | 新增本readme |
\ No newline at end of file
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/main.cpp b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/main.cpp
new file mode 100644
index 000000000..4d7d91543
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/main.cpp
@@ -0,0 +1,167 @@
+/**
+ * @file main.cpp
+ *
+ * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <vector>
+
+#include "acl/acl.h"
+#include "aclnn_add_custom.h"
+
+#define SUCCESS 0
+#define FAILED 1
+
+#define CHECK_RET(cond, return_expr) \
+    do {                             \
+        if (!(cond)) {               \
+            return_expr;             \
+        }                            \
+    } while (0)
+
+#define LOG_PRINT(message, ...)         \
+    do {                                \
+        printf(message, ##__VA_ARGS__); \
+    } while (0)
+
+int64_t GetShapeSize(const std::vector<int64_t> &shape)
+{
+    int64_t shapeSize = 1;
+    for (auto i : shape) {
+        shapeSize *= i;
+    }
+    return shapeSize;
+}
+
+int Init(int32_t deviceId, aclrtStream *stream)
+{
+    // 固定写法，acl初始化
+    auto ret = aclInit(nullptr);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclInit failed. ERROR: %d\n", ret); return FAILED);
+    ret = aclrtSetDevice(deviceId);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSetDevice failed. ERROR: %d\n", ret); return FAILED);
+    ret = aclrtCreateStream(stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtCreateStream failed. ERROR: %d\n", ret); return FAILED);
+
+    return SUCCESS;
+}
+
+template <typename T>
+int CreateAclTensor(const std::vector<T> &hostData, const std::vector<int64_t> &shape, void **deviceAddr,
+                    aclDataType dataType, aclTensor **tensor)
+{
+    auto size = GetShapeSize(shape) * sizeof(T);
+    // 调用aclrtMalloc申请device侧内存
+    auto ret = aclrtMalloc(deviceAddr, size, ACL_MEM_MALLOC_HUGE_FIRST);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret); return FAILED);
+
+    // 调用aclrtMemcpy将host侧数据拷贝到device侧内存上
+    ret = aclrtMemcpy(*deviceAddr, size, hostData.data(), size, ACL_MEMCPY_HOST_TO_DEVICE);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtMemcpy failed. ERROR: %d\n", ret); return FAILED);
+
+    // 调用aclCreateTensor接口创建aclTensor
+    *tensor = aclCreateTensor(shape.data(), shape.size(), dataType, nullptr, 0, aclFormat::ACL_FORMAT_ND, shape.data(),
+                              shape.size(), *deviceAddr);
+    return SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+    // 1. （固定写法）device/stream初始化, 参考acl对外接口列表
+    // 根据自己的实际device填写deviceId
+    int32_t deviceId = 0;
+    aclrtStream stream;
+    auto ret = Init(deviceId, &stream);
+    CHECK_RET(ret == 0, LOG_PRINT("Init acl failed. ERROR: %d\n", ret); return FAILED);
+
+    // 2. 构造输入与输出，需要根据API的接口自定义构造
+    std::vector<int64_t> inputXShape = {8, 2048};
+    std::vector<int64_t> inputYShape = {8, 2048};
+    std::vector<int64_t> outputZShape = {8, 2048};
+    void *inputXDeviceAddr = nullptr;
+    void *inputYDeviceAddr = nullptr;
+    void *outputZDeviceAddr = nullptr;
+    aclTensor *inputX = nullptr;
+    aclTensor *inputY = nullptr;
+    aclTensor *outputZ = nullptr;
+    std::vector<aclFloat16> inputXHostData(inputXShape[0] * inputXShape[1]);
+    std::vector<aclFloat16> inputYHostData(inputYShape[0] * inputYShape[1]);
+    std::vector<aclFloat16> outputZHostData(outputZShape[0] * outputZShape[1]);
+    for (int i = 0; i < inputXShape[0] * inputXShape[1]; ++i) {
+        inputXHostData[i] = aclFloatToFloat16(1.0);
+        inputYHostData[i] = aclFloatToFloat16(2.0);
+        outputZHostData[i] = aclFloatToFloat16(0.0);
+    }
+    // 创建inputX aclTensor
+    ret = CreateAclTensor(inputXHostData, inputXShape, &inputXDeviceAddr, aclDataType::ACL_FLOAT16, &inputX);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+    // 创建inputY aclTensor
+    ret = CreateAclTensor(inputYHostData, inputYShape, &inputYDeviceAddr, aclDataType::ACL_FLOAT16, &inputY);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+    // 创建outputZ aclTensor
+    ret = CreateAclTensor(outputZHostData, outputZShape, &outputZDeviceAddr, aclDataType::ACL_FLOAT16, &outputZ);
+    CHECK_RET(ret == ACL_SUCCESS, return FAILED);
+
+    // 3. 调用CANN自定义算子库API
+    uint64_t workspaceSize = 0;
+    aclOpExecutor *executor;
+    // 计算workspace大小并申请内存
+    ret = aclnnAddCustomGetWorkspaceSize(inputX, inputY, outputZ, &workspaceSize, &executor);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAddCustomGetWorkspaceSize failed. ERROR: %d\n", ret); return FAILED);
+    void *workspaceAddr = nullptr;
+    if (workspaceSize > 0) {
+        ret = aclrtMalloc(&workspaceAddr, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST);
+        CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("allocate workspace failed. ERROR: %d\n", ret); return FAILED;);
+    }
+    // 执行算子
+    ret = aclnnAddCustom(workspaceAddr, workspaceSize, executor, stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclnnAdd failed. ERROR: %d\n", ret); return FAILED);
+
+    // 4. （固定写法）同步等待任务执行结束
+    ret = aclrtSynchronizeStream(stream);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("aclrtSynchronizeStream failed. ERROR: %d\n", ret); return FAILED);
+
+    // 5. 获取输出的值，将device侧内存上的结果拷贝至host侧，需要根据具体API的接口定义修改
+    auto size = GetShapeSize(outputZShape);
+    std::vector<aclFloat16> resultData(size, 0);
+    ret = aclrtMemcpy(resultData.data(), resultData.size() * sizeof(resultData[0]), outputZDeviceAddr,
+                      size * sizeof(aclFloat16), ACL_MEMCPY_DEVICE_TO_HOST);
+    CHECK_RET(ret == ACL_SUCCESS, LOG_PRINT("copy result from device to host failed. ERROR: %d\n", ret); return FAILED);
+
+    // 6. 释放aclTensor，需要根据具体API的接口定义修改
+    aclDestroyTensor(inputX);
+    aclDestroyTensor(inputY);
+    aclDestroyTensor(outputZ);
+
+    // 7. 释放device资源，需要根据具体API的接口定义修改
+    aclrtFree(inputXDeviceAddr);
+    aclrtFree(inputYDeviceAddr);
+    aclrtFree(outputZDeviceAddr);
+    if (workspaceSize > 0) {
+        aclrtFree(workspaceAddr);
+    }
+    aclrtDestroyStream(stream);
+    aclrtResetDevice(deviceId);
+    aclFinalize();
+
+    // 打印输出结果
+    std::vector<aclFloat16> goldenData(size, aclFloatToFloat16(3.0));
+    ;
+    LOG_PRINT("result is:\n");
+    for (int64_t i = 0; i < 10; i++) {
+        LOG_PRINT("%.1f ", aclFloat16ToFloat(resultData[i]));
+    }
+    LOG_PRINT("\n");
+    if (std::equal(resultData.begin(), resultData.end(), goldenData.begin())) {
+        LOG_PRINT("test pass\n");
+    } else {
+        LOG_PRINT("test failed\n");
+    }
+    return SUCCESS;
+}
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/run.sh b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/run.sh
new file mode 100644
index 000000000..0cc50f4ee
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AclNNInvocationNaive/run.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+SHORT=r:,v:,i:,
+LONG=run-mode:,soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -r | --run-mode)
+        RUN_MODE="$2"
+        shift 2
+        ;;
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+RUN_MODE_LIST="sim npu"
+if [[ " $RUN_MODE_LIST " != *" $RUN_MODE "* ]]; then
+    echo "ERROR: RUN_MODE error, This sample only support sim or npu!"
+    exit -1
+fi
+
+if [ "${RUN_MODE}" = "npu" ] && [ "$SOC_VERSION" ]; then
+    echo "ERROR: can not specify SOC_VERSION when running on npu!"
+    exit -1
+fi
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [ "${RUN_MODE}" = "sim" ] && [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source ${_ASCEND_INSTALL_PATH}/bin/setenv.bash
+export DDK_PATH=${_ASCEND_INSTALL_PATH}
+export NPU_HOST_LIB=${_ASCEND_INSTALL_PATH}/lib64
+
+export ASCEND_TOOLKIT_HOME=${_ASCEND_INSTALL_PATH}
+export ASCEND_HOME_PATH=${_ASCEND_INSTALL_PATH}
+if [ "${RUN_MODE}" = "sim" ]; then
+    export LD_LIBRARY_PATH=${_ASCEND_INSTALL_PATH}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH
+fi
+
+set -e
+rm -rf build
+mkdir -p build
+cmake -B build
+cmake --build build -j
+(
+    cd build
+    if [ "${RUN_MODE}" = "npu" ]; then
+        msprof op --application=./execute_add_op
+    elif [ "${RUN_MODE}" = "sim" ]; then
+        msprof op simulator --application=./execute_add_op
+    fi
+)
\ No newline at end of file
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom.json b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom.json
new file mode 100644
index 000000000..dce1ed85f
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom.json
@@ -0,0 +1,40 @@
+[
+    {
+        "op": "AddCustom",
+        "language": "cpp",
+        "input_desc": [
+            {
+                "name": "x",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            },
+            {
+                "name": "y",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            }
+        ],
+        "output_desc": [
+            {
+                "name": "z",
+                "param_type": "required",
+                "format": [
+                    "ND"
+                ],
+                "type": [
+                    "float16"
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom.cpp b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom.cpp
new file mode 100644
index 000000000..840494b1a
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom.cpp
@@ -0,0 +1,76 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "add_custom_tiling.h"
+#include "register/op_def_registry.h"
+
+namespace optiling {
+const uint32_t BLOCK_DIM = 8;
+const uint32_t TILE_NUM = 8;
+static ge::graphStatus TilingFunc(gert::TilingContext *context)
+{
+    TilingData tiling;
+    uint32_t totalLength = context->GetInputShape(0)->GetOriginShape().GetShapeSize();
+    context->SetBlockDim(BLOCK_DIM);
+    tiling.set_totalLength(totalLength);
+    tiling.set_tileNum(TILE_NUM);
+    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
+    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
+    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
+    currentWorkspace[0] = 0;
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace optiling
+
+namespace ge {
+static graphStatus InferShape(gert::InferShapeContext *context)
+{
+    const gert::Shape *x1_shape = context->GetInputShape(0);
+    gert::Shape *y_shape = context->GetOutputShape(0);
+    *y_shape = *x1_shape;
+    return GRAPH_SUCCESS;
+}
+
+static graphStatus InferDataType(gert::InferDataTypeContext *context)
+{
+    const auto inputDataType = context->GetInputDataType(0);
+    context->SetOutputDataType(0, inputDataType);
+    return ge::GRAPH_SUCCESS;
+}
+} // namespace ge
+
+namespace ops {
+class AddCustom : public OpDef {
+public:
+    explicit AddCustom(const char *name) : OpDef(name)
+    {
+        this->Input("x")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Input("y")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+        this->Output("z")
+            .ParamType(REQUIRED)
+            .DataType({ge::DT_FLOAT16})
+            .Format({ge::FORMAT_ND});
+
+        this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDataType);
+        this->AICore()
+            .SetTiling(optiling::TilingFunc)
+            .AddConfig("ascend910")
+            .AddConfig("ascend310p")
+            .AddConfig("ascend310b")
+            .AddConfig("ascend910b");
+    }
+};
+OP_ADD(AddCustom);
+} // namespace ops
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom_tiling.h b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom_tiling.h
new file mode 100644
index 000000000..323f3076f
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_host/add_custom_tiling.h
@@ -0,0 +1,22 @@
+/**
+ * @file add_custom_tiling.h
+ *
+ * Copyright (C) 2023-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef ADD_CUSTOM_TILING_H
+#define ADD_CUSTOM_TILING_H
+#include "register/tilingdata_base.h"
+
+namespace optiling {
+BEGIN_TILING_DATA_DEF(TilingData)
+TILING_DATA_FIELD_DEF(uint32_t, totalLength);
+TILING_DATA_FIELD_DEF(uint32_t, tileNum);
+END_TILING_DATA_DEF;
+
+REGISTER_TILING_DATA_CLASS(AddCustom, TilingData)
+} // namespace optiling
+#endif // ADD_CUSTOM_TILING_H
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_kernel/add_custom.cpp b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_kernel/add_custom.cpp
new file mode 100644
index 000000000..f11dfe861
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/AddCustom/op_kernel/add_custom.cpp
@@ -0,0 +1,93 @@
+/**
+ * @file add_custom.cpp
+ *
+ * Copyright (C) 2022-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#include "kernel_operator.h"
+constexpr int32_t BUFFER_NUM = 2; // tensor num for each queue
+
+class KernelAdd {
+public:
+    __aicore__ inline KernelAdd() {}
+    __aicore__ inline void Init(GM_ADDR x, GM_ADDR y, GM_ADDR z, uint32_t totalLength, uint32_t tileNum)
+    {
+        this->blockLength = totalLength / AscendC::GetBlockNum();
+        this->tileNum = tileNum;
+        this->tileLength = this->blockLength / tileNum / BUFFER_NUM;
+
+        xGm.SetGlobalBuffer((__gm__ DTYPE_X *)x + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        yGm.SetGlobalBuffer((__gm__ DTYPE_Y *)y + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        zGm.SetGlobalBuffer((__gm__ DTYPE_Z *)z + this->blockLength * AscendC::GetBlockIdx(), this->blockLength);
+        pipe.InitBuffer(inQueueX, BUFFER_NUM, this->tileLength * sizeof(DTYPE_X));
+        pipe.InitBuffer(inQueueY, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Y));
+        pipe.InitBuffer(outQueueZ, BUFFER_NUM, this->tileLength * sizeof(DTYPE_Z));
+    }
+    __aicore__ inline void Process()
+    {
+        int32_t loopCount = this->tileNum * BUFFER_NUM;
+        for (int32_t i = 0; i < loopCount; i++) {
+            CopyIn(i);
+            Compute(i);
+            CopyOut(i);
+        }
+    }
+
+private:
+    __aicore__ inline void CopyIn(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.AllocTensor<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.AllocTensor<DTYPE_Y>();
+        AscendC::DataCopy(xLocal, xGm[progress * this->tileLength], this->tileLength);
+        AscendC::DataCopy(yLocal, yGm[progress * this->tileLength], this->tileLength);
+        inQueueX.EnQue(xLocal);
+        inQueueY.EnQue(yLocal);
+    }
+    __aicore__ inline void Compute(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_X> xLocal = inQueueX.DeQue<DTYPE_X>();
+        AscendC::LocalTensor<DTYPE_Y> yLocal = inQueueY.DeQue<DTYPE_Y>();
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.AllocTensor<DTYPE_Z>();
+        AscendC::Add(zLocal, xLocal, yLocal, this->tileLength);
+        outQueueZ.EnQue<DTYPE_Z>(zLocal);
+        inQueueX.FreeTensor(xLocal);
+        inQueueY.FreeTensor(yLocal);
+    }
+    __aicore__ inline void CopyOut(int32_t progress)
+    {
+        AscendC::LocalTensor<DTYPE_Z> zLocal = outQueueZ.DeQue<DTYPE_Z>();
+        AscendC::DataCopy(zGm[progress * this->tileLength], zLocal, this->tileLength);
+        outQueueZ.FreeTensor(zLocal);
+    }
+
+private:
+    AscendC::TPipe pipe;
+    AscendC::TQue<AscendC::QuePosition::VECIN, BUFFER_NUM> inQueueX, inQueueY;
+    AscendC::TQue<AscendC::QuePosition::VECOUT, BUFFER_NUM> outQueueZ;
+    AscendC::GlobalTensor<DTYPE_X> xGm;
+    AscendC::GlobalTensor<DTYPE_Y> yGm;
+    AscendC::GlobalTensor<DTYPE_Z> zGm;
+    uint32_t blockLength;
+    uint32_t tileNum;
+    uint32_t tileLength;
+};
+
+extern "C" __global__ __aicore__ void add_custom(GM_ADDR x, GM_ADDR y, GM_ADDR z, GM_ADDR workspace, GM_ADDR tiling)
+{
+    GET_TILING_DATA(tiling_data, tiling);
+    KernelAdd op;
+    op.Init(x, y, z, tiling_data.totalLength, tiling_data.tileNum);
+    op.Process();
+}
+
+#ifndef ASCENDC_CPU_DEBUG
+// call of kernel function
+void add_custom_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *x, uint8_t *y, uint8_t *z,
+                   uint8_t *workspace, uint8_t *tiling)
+{
+    add_custom<<<blockDim, l2ctrl, stream>>>(x, y, z, workspace, tiling);
+}
+#endif
\ No newline at end of file
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/README.md b/operator/AddCustomSample/FrameworkLaunchOpGen/README.md
new file mode 100644
index 000000000..5b600852d
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/README.md
@@ -0,0 +1,6 @@
+# 需要自行补充
+可支持aclnn上板运行和仿真运行
+```bash
+bash install.sh -v Ascend910B1
+(cd AclNNInvocationNaive; bash run.sh -v Ascend910B1 -r sim)
+```
\ No newline at end of file
diff --git a/operator/AddCustomSample/FrameworkLaunchOpGen/install.sh b/operator/AddCustomSample/FrameworkLaunchOpGen/install.sh
new file mode 100644
index 000000000..3225e7101
--- /dev/null
+++ b/operator/AddCustomSample/FrameworkLaunchOpGen/install.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+SHORT=v:,i:,
+LONG=soc-version:,install-path:,
+OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@")
+eval set -- "$OPTS"
+
+while :; do
+    case "$1" in
+    -v | --soc-version)
+        SOC_VERSION="$2"
+        shift 2
+        ;;
+    -i | --install-path)
+        ASCEND_INSTALL_PATH="$2"
+        shift 2
+        ;;
+    --)
+        shift
+        break
+        ;;
+    *)
+        echo "[ERROR] Unexpected option: $1"
+        break
+        ;;
+    esac
+done
+
+VERSION_LIST="Ascend910A Ascend910B Ascend310B1 Ascend310B2 Ascend310B3 Ascend310B4 Ascend310P1 Ascend310P3 Ascend910B1 Ascend910B2 Ascend910B3 Ascend910B4"
+if [[ " $VERSION_LIST " != *" $SOC_VERSION "* ]]; then
+    echo "ERROR: SOC_VERSION should be in [$VERSION_LIST]"
+    exit -1
+fi
+
+if [ -n "$ASCEND_INSTALL_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_INSTALL_PATH
+elif [ -n "$ASCEND_HOME_PATH" ]; then
+    _ASCEND_INSTALL_PATH=$ASCEND_HOME_PATH
+else
+    if [ -d "$HOME/Ascend/ascend-toolkit/latest" ]; then
+        _ASCEND_INSTALL_PATH=$HOME/Ascend/ascend-toolkit/latest
+    else
+        _ASCEND_INSTALL_PATH=/usr/local/Ascend/ascend-toolkit/latest
+    fi
+fi
+source $_ASCEND_INSTALL_PATH/bin/setenv.bash
+export ASCEND_HOME_PATH=$_ASCEND_INSTALL_PATH
+
+rm -rf CustomOp
+msopgen gen -i AddCustom.json -f aclnn -c ai_core-${SOC_VERSION} -lan cpp -out CustomOp
+cp -rf AddCustom/* CustomOp
+(cd CustomOp && bash build.sh)
\ No newline at end of file
-- 
Gitee