From aa5681d281cd66c032e32363206b928e95223d5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=89=9B=E5=90=9B=E8=B1=AA?= <niujunhao@huawei.com>
Date: Wed, 17 Jul 2024 11:03:02 +0800
Subject: [PATCH 01/33] =?UTF-8?q?bugfix=20=E6=95=99=E7=A8=8B=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E7=9B=AE=E5=BD=95=E9=94=99=E8=AF=AF=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/feature_cards/Offset_Recompute.md        |  4 +-
 docs/readthedocs/source_zh_cn/conf.py         |  8 +--
 .../source_zh_cn/docs/practice/Environment.md | 61 +++++++++----------
 docs/readthedocs/source_zh_cn/index.rst       |  6 +-
 4 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/docs/feature_cards/Offset_Recompute.md b/docs/feature_cards/Offset_Recompute.md
index 57ca0652..c1c6496d 100644
--- a/docs/feature_cards/Offset_Recompute.md
+++ b/docs/feature_cards/Offset_Recompute.md
@@ -3,7 +3,7 @@
 在大模型训练调优过程中，设备内存的合理使用和分配是一项重要的环节，除了通过各种并行方式将模型、优化器状态等数据切分到不同设备外，还可以通过调整流水并行的负载偏置与重计算来精细调整内存的使用。
 在mindformers/models/utils.py中提供了set_layer_stage_recompute函数，用于灵活配置每一层的stage_id与重计算。
 
-# 配置流水并行的负载均衡
+## 配置流水并行的负载均衡
 
 流水并行默认网络层数num_layers可以被pp数pipeline_stage整除，每个stage中包含num_layers/pipeline_stage层。
 如果网络层数num_layers不能被pp数pipeline_stage整除，或者调整每个stage中包含的层数，那么可以通过offset参数进行配置。
@@ -13,7 +13,7 @@ offset可以传入一个list或tuple，此时，list的元素个数需要等于p
 
 例如，一个网络有48层，pp数为5,offset设为[0,1,1,1,0]，那么这5个stage的层数为9,10,10,10,9。
 
-# 配置重计算与选择重计算
+## 配置重计算与选择重计算
 
 重计算可以显著降低训练时使用的内存，但会额外增加一些计算。
 
diff --git a/docs/readthedocs/source_zh_cn/conf.py b/docs/readthedocs/source_zh_cn/conf.py
index c9123bb4..b3e92667 100644
--- a/docs/readthedocs/source_zh_cn/conf.py
+++ b/docs/readthedocs/source_zh_cn/conf.py
@@ -34,7 +34,7 @@ from sphinx.util import logging
 
 project = 'mindformers'
 # pylint: disable=W0622
-copyright = '2023, mindformers contributors'
+copyright = '2024, mindformers contributors'
 author = 'mindformers contributors'
 
 # The full version, including alpha/beta/rc tags
@@ -121,9 +121,9 @@ for file_path in copy_path:
 
 # split README into 4 parts
 with open('README.md', 'r') as f:
-    title_list = ['Introduction', 'Install', 'Version_Match',
-                  'Quick_Tour', 'Contribution', 'License']
-    title_for_index = ['# 介绍', '# 安装', '# 版本配套', '# 快速开始', '-', '-']
+    title_list = ['Introduction', 'Install', 'User_Guide',
+                  'Contribution', 'License']
+    title_for_index = ['# 介绍', '# 安装与版本配套', '# 使用指南', '-', '-']
     file_count = 0
     fn = None
     for line in f:
diff --git a/docs/readthedocs/source_zh_cn/docs/practice/Environment.md b/docs/readthedocs/source_zh_cn/docs/practice/Environment.md
index 12d954b4..07365fae 100644
--- a/docs/readthedocs/source_zh_cn/docs/practice/Environment.md
+++ b/docs/readthedocs/source_zh_cn/docs/practice/Environment.md
@@ -6,44 +6,41 @@ Mindformers提供了以下环境变量的配置说明，请根据使用场景自
 
 以下配置适用于mindformers框架相关的环境变量
 
-| 环境变量            | 功能           | 类型    | 取值                                                         | 说明                                         |
-| ------------------- | -------------- | ------- | ------------------------------------------------------------ | -------------------------------------------- |
-| SHARED_PATHS        | 指定共享盘路径 | String  | 路径，支持相对路径与绝对路径，支持同时设置多个路径，如："/data/mount0,/data/mount1"。 | 设置后，会将指定的路径及其子路径视为共享路径 |
-| DEVICE_NUM_PER_NODE | 单机NPU数量    | Integer | 单机实际NPU数量，不设置默认为8卡服务器。                     |                                              |
-| CPU_AFFINITY        | CPU绑核 | String  | 1/0, 不设置默认为0 | 设置后，将开启CPU绑核操作，可提升编译时间的稳定性 |
+| 环境变量                 | 功能       | 类型       | 取值                                                            | 说明                          |
+|:---------------------|:---------|:---------|:--------------------------------------------------------------|:----------------------------|
+| SHARED_PATHS         | 指定共享盘路径  | String   | 路径，支持相对路径与绝对路径，支持同时设置多个路径，<br/>如："/data/mount0,/data/mount1"。 | 设置后，会将指定的路径及其子路径视为共享路径      |
+| DEVICE_NUM_PER_NODE  | 单机NPU数量  | Integer  | 单机实际NPU数量，不设置默认为8卡服务器。                                        |                             |
+| CPU_AFFINITY         | CPU绑核    | String   | 1/0, 不设置默认为0                                                  | 设置后，将开启CPU绑核操作，可提升编译时间的稳定性  |
+
 ## 调试调优
 
 以下配置适用于网络模型调试调优过程中的内存分析、DUMP功能、日志打印、通信等待等方面。
 
-|            环境变量             | 功能                  |   类型    | 取值                                                                                                                   | 说明                                                      |
-|:---------------------------:|:--------------------|:-------:|:---------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------|
-|     LOG_MF_PATH     | Mindformers日志保存位置         | String | 路径，支持相对路径与绝对路径                                                                                            | 设置后，会将Mindformers的日志文件保存到该路径，建议使用绝对路径。                                |
-|     MS_MEMORY_STATISTIC     | 内存析构                | Integer | 1：开启<br>   0：关闭<br> 默认值：0                                                                                            | 若开启内存析构，会在OOM时打印内存池占用情况。                                |
-|    MINDSPORE_DUMP_CONFIG    | 指定Dump功能所依赖的配置文件的路径 | String  | 文件路径，支持相对路径与绝对路径                                                                                                     |                                                         |
-|           GLOG_v            | 控制Mindspore日志的级别    | Integer | 0-DEBUG<br> 1-INFO<br> 2-WARNING<br> 3-ERROR，表示程序执行出现报错，输出错误日志，程序可能不会终止<br> 4-CRITICAL，表示程序执行出现异常，将会终止执行程序<br> 默认值：2 | 指定日志级别后，将会输出大于或等于该级别的日志信息。                              |
-|   ASCEND_GLOBAL_LOG_LEVEL   | 控制CANN的日志级别         |    Integer     | 0-DEBUG<br> 1-INFO<br> 2-WARNING<br> 3-ERROR  <br> 默认值：3                                                             |                                                         |
-| ASCEND_SLOG_PRINT_TO_STDOUT | 设置plog日志是否打屏        |    Integer     | 1：开启<br>   0：关闭<br> 默认值：0                                                                                            |                                                         |
-| ASCEND_GLOBAL_EVENT_ENABLE  | 设置事件级别              |    Integer     | 1：开启Event日志<br>   0：关闭Event日志<br> 默认值：0                                                                              |                                                         |
-|      HCCL_EXEC_TIMEOUT      | HCCL进程执行同步等待时间      |    Integer     | 执行同步等待时间（s）<br> 默认值：1800s                                                                                            | 不同设备进程在分布式训练过程中存在卡间执行任务不一致的场景，通过该环境变量可控制设备间执行时的同步等待的时间。 |
-|    HCCL_CONNECT_TIMEOUT     | HCCL建链超时等待时间        |    Integer     | 建链等待时间（s）<br> 默认值：120s                                                                                               | 用于限制不同设备之间socket建链过程的超时等待时间。                            |
-
-## 910相关配置
-
-以下配置仅在910服务器上适用。
-
-|            环境变量             | 功能                    |   类型    | 取值                                                                                | 说明                                              |
-|:---------------------------:|:----------------------|:-------:|:----------------------------------------------------------------------------------|:------------------------------------------------|
-|          MS_GE_TRAIN          | 训练/推理场景选择             | Integer | 1：训练场景   <br>   0：推理场景，host侧内存使用会大于训练场景。   <br> 默认值：1                             | MS_GE_TRAIN=1和=0分别用于训练和推理场景，GE编译流程不同。           |
-|         MS_ENABLE_GE          | 使能GE后端                | Integer  | 1：开启<br>   0：关闭<br> 默认值：1                                                         |                                                 |
-|      MS_ENABLE_REF_MODE       | REF_MODE编译优化          | Integer | 1：开启<br>   0：关闭<br> 默认值：1                                                         | CANN-7.0以上版本支持此模式，优化内存管理方式，建议开启。                |
-|     MS_ENABLE_FORMAT_MODE     | 整网ND格式                |    Integer     | 1：开启<br>   0：关闭<br> 默认值：0                                                         | 将整网算子转换为ND格式计算，建议开启。                            |
-|   MS_GE_ATOMIC_CLEAN_POLICY   | 清理网络中atomic算子占用的内存的策略 |    Integer     | 0：集中清理网络中所有atomic算子占用的内存。<br>      1：不集中清理内存，对网络中每一个atomic算子进行单独清零。 <br> 默认值：1    |                                                 |
-|       ENABLE_LAZY_INLINE       | 开启lazy inline         |    Integer     | 1：开启<br>   0：关闭<br> 默认值：1                                                         | 此特性在mindspore≥2.2.0下适用。通常在pipeline并行时使用以提高编译性能。默认开启，可配置关闭。 |
-|  ENABLE_LAZY_INLINE_NO_PIPELINE  | 在非pipeline并行下开启lazy inline |  Integer  | 1：开启<br>   0：关闭<br> 默认值：0   | lazy inline特性默认仅在pipeline并行模式下开启。如需在其他并行模式下使能lazy inline，可将该环境变量设置为1。 |
-| MS_ASCEND_CHECK_OVERFLOW_MODE | 溢出检测模式                |    String     | 默认：饱和模式，不设置此参数，当中间过程溢出时会上报，停止loss更新<br>    INFNAN_MODE：NAN模式，忽略过程中的溢出，结果非溢出就会继续训练 | 遇到持续溢出问题时可尝试设置此变量为INFNAN_MODE。                  |
+| 环境变量                        | 功能                  | 类型      | 取值                                                                                                                   | 说明                                                      |
+|:----------------------------|:--------------------|:--------|:---------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------|
+| LOG_MF_PATH                 | Mindformers日志保存位置   | String  | 路径，支持相对路径与绝对路径                                                                                                       | 设置后，会将Mindformers的日志文件保存到该路径，建议使用绝对路径。                  |
+| MS_MEMORY_STATISTIC         | 内存析构                | Integer | 1：开启<br>   0：关闭<br> 默认值：0                                                                                            | 若开启内存析构，会在OOM时打印内存池占用情况。                                |
+| MINDSPORE_DUMP_CONFIG       | 指定Dump功能所依赖的配置文件的路径 | String  | 文件路径，支持相对路径与绝对路径                                                                                                     |                                                         |
+| GLOG_v                      | 控制Mindspore日志的级别    | Integer | 0-DEBUG<br> 1-INFO<br> 2-WARNING<br> 3-ERROR，表示程序执行出现报错，输出错误日志，程序可能不会终止<br> 4-CRITICAL，表示程序执行出现异常，将会终止执行程序<br> 默认值：2 | 指定日志级别后，将会输出大于或等于该级别的日志信息。                              |
+| ASCEND_GLOBAL_LOG_LEVEL     | 控制CANN的日志级别         | Integer | 0-DEBUG<br> 1-INFO<br> 2-WARNING<br> 3-ERROR  <br> 默认值：3                                                             |                                                         |
+| ASCEND_SLOG_PRINT_TO_STDOUT | 设置plog日志是否打屏        | Integer | 1：开启<br>   0：关闭<br> 默认值：0                                                                                            |                                                         |
+| ASCEND_GLOBAL_EVENT_ENABLE  | 设置事件级别              | Integer | 1：开启Event日志<br>   0：关闭Event日志<br> 默认值：0                                                                              |                                                         |
+| HCCL_EXEC_TIMEOUT           | HCCL进程执行同步等待时间      | Integer | 执行同步等待时间（s）<br> 默认值：1800s                                                                                            | 不同设备进程在分布式训练过程中存在卡间执行任务不一致的场景，通过该环境变量可控制设备间执行时的同步等待的时间。 |
+| HCCL_CONNECT_TIMEOUT        | HCCL建链超时等待时间        | Integer | 建链等待时间（s）<br> 默认值：120s                                                                                               | 用于限制不同设备之间socket建链过程的超时等待时间。                            |
+
+## Ascend服务器相关配置
+
+以下配置仅在Ascend服务器上适用。
+
+| 环境变量                           | 功能                         | 类型      | 取值                                                                                | 说明                                                                    |
+|:-------------------------------|:---------------------------|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------|
+| MS_GE_ATOMIC_CLEAN_POLICY      | 清理网络中atomic算子占用的内存的策略      | Integer | 0：集中清理网络中所有atomic算子占用的内存。<br>      1：不集中清理内存，对网络中每一个atomic算子进行单独清零。 <br> 默认值：1    |                                                                       |
+| ENABLE_LAZY_INLINE             | 开启lazy inline              | Integer | 1：开启<br>   0：关闭<br> 默认值：1                                                         | 此特性在mindspore≥2.2.0下适用。通常在pipeline并行时使用以提高编译性能。默认开启，可配置关闭。            |
+| ENABLE_LAZY_INLINE_NO_PIPELINE | 在非pipeline并行下开启lazy inline | Integer | 1：开启<br>   0：关闭<br> 默认值：0                                                         | lazy inline特性默认仅在pipeline并行模式下开启。如需在其他并行模式下使能lazy inline，可将该环境变量设置为1。 |
+| MS_ASCEND_CHECK_OVERFLOW_MODE  | 溢出检测模式                     | String  | 默认：饱和模式，不设置此参数，当中间过程溢出时会上报，停止loss更新<br>    INFNAN_MODE：NAN模式，忽略过程中的溢出，结果非溢出就会继续训练 | 遇到持续溢出问题时可尝试设置此变量为INFNAN_MODE。                                        |
 
 ## Mindspore
 
 mindspore相关环境变量请参考以下链接：
 
-[环境变量](https://www.mindspore.cn/docs/zh-CN/r2.2/note/env_var_list.html)
+[MindSpore环境变量](https://www.mindspore.cn/docs/zh-CN/r2.2/note/env_var_list.html)
diff --git a/docs/readthedocs/source_zh_cn/index.rst b/docs/readthedocs/source_zh_cn/index.rst
index f6ecbb31..cff0bb9e 100644
--- a/docs/readthedocs/source_zh_cn/index.rst
+++ b/docs/readthedocs/source_zh_cn/index.rst
@@ -15,7 +15,6 @@
    :maxdepth: 1
    :caption: 安装
 
-   Version_Match.md
    Install.md
 
 
@@ -35,9 +34,9 @@
 .. toctree::
    :glob:
    :maxdepth: 1
-   :caption: 快速入门
+   :caption: 使用指南
 
-   Quick_Tour.md
+   User_Guide.md
 
 
 .. toctree::
@@ -56,6 +55,7 @@
 
    docs/feature_cards/LLM_DataLoader.md
    docs/feature_cards/Training_Algorithms.md
+   docs/feature_cards/Long_Sequence_Training.md
    docs/feature_cards/Resume_Training.md
    docs/feature_cards/Pet_Tuners.md
    docs/feature_cards/Auto_Parallel.md
-- 
Gitee


From 357b56f2bb7549ee75e8cb117b824fc835e9c28d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=89=9B=E5=90=9B=E8=B1=AA?= <niujunhao@huawei.com>
Date: Thu, 18 Jul 2024 09:43:32 +0800
Subject: [PATCH 02/33] =?UTF-8?q?=E3=80=90r1.2.0=E3=80=91=E7=A7=BB?=
 =?UTF-8?q?=E9=99=A4=E9=9D=9E=E6=94=AF=E6=8C=81=E6=A8=A1=E5=9E=8B=E5=88=97?=
 =?UTF-8?q?=E8=A1=A8=E4=B8=AD=E6=A8=A1=E5=9E=8B=E6=96=87=E6=A1=A3=E8=AF=B4?=
 =?UTF-8?q?=E6=98=8E=EF=BC=8C=E4=BF=9D=E7=95=99=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/model_cards/bert.md                      |  165 ---
 docs/model_cards/bloom.md                     |  876 -------------
 docs/model_cards/clip.md                      |  166 ---
 docs/model_cards/codegeex2.md                 |  714 ----------
 docs/model_cards/glm.md                       |  869 -------------
 docs/model_cards/llama.md                     | 1150 -----------------
 docs/model_cards/mae.md                       |  368 ------
 docs/model_cards/pangualpha.md                |  837 ------------
 docs/model_cards/sam.md                       |  364 ------
 docs/model_cards/swin.md                      |  395 ------
 docs/model_cards/t5.md                        |  127 --
 docs/model_cards/vit.md                       |  391 ------
 docs/model_support_list.md                    |   10 -
 mindformers/mindformer_book.py                |   12 -
 research/baichuan/baichuan.md                 |  240 ----
 research/baichuan/baichuan_13b.py             |  883 -------------
 research/baichuan/convert_reversed.py         |   94 --
 research/baichuan/convert_weight.py           |  103 --
 research/baichuan/run_baichuan_13b.yaml       |  212 ---
 research/baichuan/run_baichuan_13b_910b.yaml  |  212 ---
 research/baichuan/run_baichuan_13b_base.py    |  126 --
 research/baichuan/run_baichuan_13b_chat.py    |  135 --
 research/baichuan/run_baichuan_7b.yaml        |  205 ---
 research/codegeex/code_tokenizer.py           |   86 --
 research/codegeex/codegeex.md                 |  142 --
 research/codegeex/codegeex.py                 |   97 --
 research/codegeex/convert_weight.py           |  212 ---
 research/codegeex/data_preprocess.py          |  146 ---
 research/codegeex/run_codegeex.py             |  117 --
 research/codegeex/run_codegeex_910b.yaml      |  170 ---
 research/knowlm/convert_reversed.py           |   68 -
 research/knowlm/convert_weight.py             |   83 --
 research/knowlm/generate.py                   |   54 -
 research/knowlm/knowlm.md                     |  133 --
 research/knowlm/knowlm.yaml                   |    8 -
 research/knowlm/run_knowlm_13b.yaml           |  205 ---
 research/rewardmodel/convert_weight_reward.py |  156 ---
 .../rewardmodel/run_bloom_7.1b_reward.yaml    |  188 ---
 research/skywork/convert_reversed.py          |   70 -
 research/skywork/convert_weight.py            |   73 --
 research/skywork/run_skywork.py               |  148 ---
 research/skywork/run_skywork_13b.yaml         |  210 ---
 research/skywork/skywork.md                   |  579 ---------
 research/skywork/skywork_dataprocess.py       |  138 --
 research/telechat/convert_reversed.py         |   80 --
 research/telechat/convert_weight.py           |   96 --
 .../telechat/convert_weight_ms_to_torch.py    |   97 --
 .../telechat/convert_weight_torch_to_ms.py    |  110 --
 research/telechat/run_telechat.py             |  199 ---
 research/telechat/run_telechat_12b_910b.yaml  |  210 ---
 .../run_telechat_12b_finetune_910b.yaml       |  210 ---
 research/telechat/run_telechat_7b_910b.yaml   |  210 ---
 .../run_telechat_7b_finetune_910b.yaml        |  210 ---
 research/telechat/run_telechat_predict.py     |  160 ---
 research/telechat/telechat.md                 |  548 --------
 research/telechat/telechat.py                 |  447 -------
 research/telechat/telechat_config.py          |  174 ---
 research/telechat/telechat_layer.py           |  253 ----
 research/telechat/telechat_predict_utils.py   |   73 --
 research/telechat/telechat_preprocess.py      |  161 ---
 research/telechat/telechat_tokenizer.py       |  263 ----
 research/telechat/telechat_transformer.py     |  497 -------
 research/visualglm/attention.py               |  153 ---
 research/visualglm/context.cfg                |    8 -
 research/visualglm/convert_weight.py          |  303 -----
 .../visualglm/examples/example_inputs.jsonl   |    1 -
 research/visualglm/examples/titanic.jpg       |  Bin 29865 -> 0 bytes
 .../visualglm/finetune/finetune_inputs.jsonl  |    1 -
 research/visualglm/finetune/sea.jpg           |  Bin 28270 -> 0 bytes
 research/visualglm/layers.py                  |   88 --
 research/visualglm/qformer.py                 |  997 --------------
 research/visualglm/run_visualglm.py           |  208 ---
 ...visualglm_6b_image_to_text_generation.yaml |  225 ----
 research/visualglm/run_visualglm_finetune.py  |  139 --
 research/visualglm/run_visualglm_lora.yaml    |  291 -----
 research/visualglm/run_visualglm_pipeline.py  |   92 --
 research/visualglm/run_visualglm_with_lora.py |  217 ----
 research/visualglm/visualglm.md               |  373 ------
 research/visualglm/visualglm.py               |  307 -----
 research/visualglm/visualglm_base.py          |  114 --
 research/visualglm/visualglm_config.py        |  153 ---
 research/visualglm/visualglm_dataloader.py    |  122 --
 research/visualglm/visualglm_dataset.py       |  327 -----
 research/visualglm/visualglm_glm.py           |  210 ---
 research/visualglm/visualglm_lr_schedule.py   |   71 -
 research/visualglm/visualglm_processor.py     |  199 ---
 research/visualglm/visualglm_qformer.py       |  572 --------
 .../visualglm_text_generation_pipeline.py     |  257 ----
 research/visualglm/visualglm_vit.py           |  109 --
 research/wizardcoder/convert_reversed.py      |  151 ---
 research/wizardcoder/convert_weight.py        |  212 ---
 .../finetune_wizardcoder_15b_bf16.yaml        |  214 ---
 .../finetune_wizardcoder_15b_fp16.yaml        |  214 ---
 research/wizardcoder/infer_bf16_npu.py        |   82 --
 .../inference_wizardcoder_pytorch.py          |  119 --
 research/wizardcoder/mbpp_gen_online.py       |  126 --
 research/wizardcoder/mbpp_process.py          |   72 --
 .../predict_wizardcoder_15b_fp16.yaml         |  214 ---
 .../pretrain_wizardcoder_15b_bf16.yaml        |  215 ---
 research/wizardcoder/run_wizardcoder.py       |  163 ---
 research/wizardcoder/run_wizardcoder.yaml     |  214 ---
 research/wizardcoder/wizardcoder.md           |  514 --------
 research/wizardcoder/wizardcoder.py           |  417 ------
 research/wizardcoder/wizardcoder_config.py    |  107 --
 research/wizardcoder/wizardcoder_modules.py   |  599 ---------
 .../wizardcoder/wizardcoder_preprocess.py     |  192 ---
 research/wizardcoder/wizardcoder_tokenizer.py |  261 ----
 research/yi/yi.md                             |    2 +-
 research/ziya/run_ziya_13b.yaml               |  206 ---
 research/ziya/ziya.md                         |   73 --
 .../st/test_model/test_bert_model/__init__.py |   15 -
 .../test_bert_model/test_auto_class.py        |   95 --
 .../test_bert_model/test_pipeline.py          |   41 -
 .../test_bert_model/test_trainer.py           |  102 --
 .../test_model/test_bloom_model/__init__.py   |   15 -
 .../test_model/test_bloom_model/base_model.py |   55 -
 .../test_bloom_model/test_auto_class.py       |   97 --
 .../test_model/test_bloom_model/test_eval.py  |   51 -
 .../test_bloom_model/test_pipeline.py         |   41 -
 .../test_bloom_model/test_predict.py          |   50 -
 .../test_model/test_bloom_model/test_train.py |   54 -
 .../st/test_model/test_clip_model/__init__.py |   15 -
 .../test_clip_model/test_clip_model.py        |  110 --
 .../test_clip_model/test_clip_tokenizer.py    |  103 --
 .../test_codegeex2_model/__init__.py          |   15 -
 .../test_codegeex2_model/base_model.py        |   68 -
 .../test_codegeex2_model/test_auto_class.py   |   86 --
 .../test_codegeex2_model/test_eval.py         |   50 -
 .../test_codegeex2_model/test_predict.py      |   51 -
 .../test_codegeex2_model/test_train.py        |   53 -
 .../test_glm_lora_model/__init__.py           |   15 -
 .../test_glm_lora_trainer.py                  |  108 --
 .../st/test_model/test_glm_model/__init__.py  |   15 -
 .../test_glm_model/test_auto_class.py         |  103 --
 .../test_glm_model/test_pipeline.py           |   41 -
 .../test_model/test_glm_model/test_trainer.py |  122 --
 .../st/test_model/test_mae_model/__init__.py  |   15 -
 .../test_mae_model/test_auto_class.py         |   77 --
 .../test_pangualpha_model/__init__.py         |   15 -
 .../test_pangualpha_autoclass.py              |   93 --
 .../test_pangualpha_pipeline.py               |   38 -
 .../test_pangualpha_trainer.py                |  114 --
 tests/st/test_model/test_qa_model/__init__.py |   15 -
 .../test_model/test_qa_model/test_qa_model.py |   86 --
 .../st/test_model/test_sam_model/__init__.py  |    0
 .../test_sam_model/test_auto_class.py         |   77 --
 .../test_sam_model/test_pipeline.py           |  128 --
 .../st/test_model/test_swin_model/__init__.py |   15 -
 .../test_swin_model/test_auto_class.py        |   77 --
 tests/st/test_model/test_t5_model/__init__.py |   15 -
 .../test_t5_model/test_t5_generation.py       |   59 -
 .../test_t5_model/test_t5_tokenizer.py        |   45 -
 .../test_t5_model/test_t5_trainer.py          |  153 ---
 .../test_model/test_tokcls_model/__init__.py  |   15 -
 .../test_tokcls_model/test_tokcls_model.py    |   82 --
 .../test_tokcls_tokenizer.py                  |   66 -
 .../test_model/test_txtcls_model/__init__.py  |   15 -
 .../test_txtcls_model/test_txtcls_model.py    |   78 --
 .../st/test_model/test_vit_model/__init__.py  |    0
 .../test_vit_model/test_auto_class.py         |   77 --
 .../test_wizardcoder_model/__init__.py        |   15 -
 .../test_wizardcoder_model/test_trainer.py    |   67 -
 .../test_training_precision.py                |  110 --
 163 files changed, 1 insertion(+), 28756 deletions(-)
 delete mode 100644 docs/model_cards/bert.md
 delete mode 100644 docs/model_cards/bloom.md
 delete mode 100644 docs/model_cards/clip.md
 delete mode 100644 docs/model_cards/codegeex2.md
 delete mode 100644 docs/model_cards/glm.md
 delete mode 100644 docs/model_cards/llama.md
 delete mode 100644 docs/model_cards/mae.md
 delete mode 100644 docs/model_cards/pangualpha.md
 delete mode 100644 docs/model_cards/sam.md
 delete mode 100644 docs/model_cards/swin.md
 delete mode 100644 docs/model_cards/t5.md
 delete mode 100644 docs/model_cards/vit.md
 delete mode 100644 research/baichuan/baichuan.md
 delete mode 100644 research/baichuan/baichuan_13b.py
 delete mode 100644 research/baichuan/convert_reversed.py
 delete mode 100644 research/baichuan/convert_weight.py
 delete mode 100644 research/baichuan/run_baichuan_13b.yaml
 delete mode 100644 research/baichuan/run_baichuan_13b_910b.yaml
 delete mode 100644 research/baichuan/run_baichuan_13b_base.py
 delete mode 100644 research/baichuan/run_baichuan_13b_chat.py
 delete mode 100644 research/baichuan/run_baichuan_7b.yaml
 delete mode 100644 research/codegeex/code_tokenizer.py
 delete mode 100644 research/codegeex/codegeex.md
 delete mode 100644 research/codegeex/codegeex.py
 delete mode 100644 research/codegeex/convert_weight.py
 delete mode 100644 research/codegeex/data_preprocess.py
 delete mode 100644 research/codegeex/run_codegeex.py
 delete mode 100644 research/codegeex/run_codegeex_910b.yaml
 delete mode 100644 research/knowlm/convert_reversed.py
 delete mode 100644 research/knowlm/convert_weight.py
 delete mode 100644 research/knowlm/generate.py
 delete mode 100644 research/knowlm/knowlm.md
 delete mode 100644 research/knowlm/knowlm.yaml
 delete mode 100644 research/knowlm/run_knowlm_13b.yaml
 delete mode 100644 research/rewardmodel/convert_weight_reward.py
 delete mode 100644 research/rewardmodel/run_bloom_7.1b_reward.yaml
 delete mode 100644 research/skywork/convert_reversed.py
 delete mode 100644 research/skywork/convert_weight.py
 delete mode 100644 research/skywork/run_skywork.py
 delete mode 100644 research/skywork/run_skywork_13b.yaml
 delete mode 100644 research/skywork/skywork.md
 delete mode 100644 research/skywork/skywork_dataprocess.py
 delete mode 100644 research/telechat/convert_reversed.py
 delete mode 100644 research/telechat/convert_weight.py
 delete mode 100644 research/telechat/convert_weight_ms_to_torch.py
 delete mode 100644 research/telechat/convert_weight_torch_to_ms.py
 delete mode 100644 research/telechat/run_telechat.py
 delete mode 100644 research/telechat/run_telechat_12b_910b.yaml
 delete mode 100644 research/telechat/run_telechat_12b_finetune_910b.yaml
 delete mode 100644 research/telechat/run_telechat_7b_910b.yaml
 delete mode 100644 research/telechat/run_telechat_7b_finetune_910b.yaml
 delete mode 100644 research/telechat/run_telechat_predict.py
 delete mode 100644 research/telechat/telechat.md
 delete mode 100644 research/telechat/telechat.py
 delete mode 100644 research/telechat/telechat_config.py
 delete mode 100644 research/telechat/telechat_layer.py
 delete mode 100644 research/telechat/telechat_predict_utils.py
 delete mode 100644 research/telechat/telechat_preprocess.py
 delete mode 100644 research/telechat/telechat_tokenizer.py
 delete mode 100644 research/telechat/telechat_transformer.py
 delete mode 100644 research/visualglm/attention.py
 delete mode 100644 research/visualglm/context.cfg
 delete mode 100644 research/visualglm/convert_weight.py
 delete mode 100644 research/visualglm/examples/example_inputs.jsonl
 delete mode 100644 research/visualglm/examples/titanic.jpg
 delete mode 100644 research/visualglm/finetune/finetune_inputs.jsonl
 delete mode 100644 research/visualglm/finetune/sea.jpg
 delete mode 100644 research/visualglm/layers.py
 delete mode 100644 research/visualglm/qformer.py
 delete mode 100644 research/visualglm/run_visualglm.py
 delete mode 100644 research/visualglm/run_visualglm_6b_image_to_text_generation.yaml
 delete mode 100644 research/visualglm/run_visualglm_finetune.py
 delete mode 100644 research/visualglm/run_visualglm_lora.yaml
 delete mode 100644 research/visualglm/run_visualglm_pipeline.py
 delete mode 100644 research/visualglm/run_visualglm_with_lora.py
 delete mode 100644 research/visualglm/visualglm.md
 delete mode 100644 research/visualglm/visualglm.py
 delete mode 100644 research/visualglm/visualglm_base.py
 delete mode 100644 research/visualglm/visualglm_config.py
 delete mode 100644 research/visualglm/visualglm_dataloader.py
 delete mode 100644 research/visualglm/visualglm_dataset.py
 delete mode 100644 research/visualglm/visualglm_glm.py
 delete mode 100644 research/visualglm/visualglm_lr_schedule.py
 delete mode 100644 research/visualglm/visualglm_processor.py
 delete mode 100644 research/visualglm/visualglm_qformer.py
 delete mode 100644 research/visualglm/visualglm_text_generation_pipeline.py
 delete mode 100644 research/visualglm/visualglm_vit.py
 delete mode 100644 research/wizardcoder/convert_reversed.py
 delete mode 100644 research/wizardcoder/convert_weight.py
 delete mode 100644 research/wizardcoder/finetune_wizardcoder_15b_bf16.yaml
 delete mode 100644 research/wizardcoder/finetune_wizardcoder_15b_fp16.yaml
 delete mode 100644 research/wizardcoder/infer_bf16_npu.py
 delete mode 100644 research/wizardcoder/inference_wizardcoder_pytorch.py
 delete mode 100644 research/wizardcoder/mbpp_gen_online.py
 delete mode 100644 research/wizardcoder/mbpp_process.py
 delete mode 100644 research/wizardcoder/predict_wizardcoder_15b_fp16.yaml
 delete mode 100644 research/wizardcoder/pretrain_wizardcoder_15b_bf16.yaml
 delete mode 100644 research/wizardcoder/run_wizardcoder.py
 delete mode 100644 research/wizardcoder/run_wizardcoder.yaml
 delete mode 100644 research/wizardcoder/wizardcoder.md
 delete mode 100644 research/wizardcoder/wizardcoder.py
 delete mode 100644 research/wizardcoder/wizardcoder_config.py
 delete mode 100644 research/wizardcoder/wizardcoder_modules.py
 delete mode 100644 research/wizardcoder/wizardcoder_preprocess.py
 delete mode 100644 research/wizardcoder/wizardcoder_tokenizer.py
 delete mode 100755 research/ziya/run_ziya_13b.yaml
 delete mode 100644 research/ziya/ziya.md
 delete mode 100644 tests/st/test_model/test_bert_model/__init__.py
 delete mode 100644 tests/st/test_model/test_bert_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_bert_model/test_pipeline.py
 delete mode 100644 tests/st/test_model/test_bert_model/test_trainer.py
 delete mode 100644 tests/st/test_model/test_bloom_model/__init__.py
 delete mode 100644 tests/st/test_model/test_bloom_model/base_model.py
 delete mode 100644 tests/st/test_model/test_bloom_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_bloom_model/test_eval.py
 delete mode 100644 tests/st/test_model/test_bloom_model/test_pipeline.py
 delete mode 100644 tests/st/test_model/test_bloom_model/test_predict.py
 delete mode 100644 tests/st/test_model/test_bloom_model/test_train.py
 delete mode 100644 tests/st/test_model/test_clip_model/__init__.py
 delete mode 100644 tests/st/test_model/test_clip_model/test_clip_model.py
 delete mode 100644 tests/st/test_model/test_clip_model/test_clip_tokenizer.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/__init__.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/base_model.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/test_eval.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/test_predict.py
 delete mode 100644 tests/st/test_model/test_codegeex2_model/test_train.py
 delete mode 100644 tests/st/test_model/test_glm_lora_model/__init__.py
 delete mode 100644 tests/st/test_model/test_glm_lora_model/test_glm_lora_trainer.py
 delete mode 100644 tests/st/test_model/test_glm_model/__init__.py
 delete mode 100644 tests/st/test_model/test_glm_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_glm_model/test_pipeline.py
 delete mode 100644 tests/st/test_model/test_glm_model/test_trainer.py
 delete mode 100644 tests/st/test_model/test_mae_model/__init__.py
 delete mode 100644 tests/st/test_model/test_mae_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_pangualpha_model/__init__.py
 delete mode 100644 tests/st/test_model/test_pangualpha_model/test_pangualpha_autoclass.py
 delete mode 100644 tests/st/test_model/test_pangualpha_model/test_pangualpha_pipeline.py
 delete mode 100644 tests/st/test_model/test_pangualpha_model/test_pangualpha_trainer.py
 delete mode 100644 tests/st/test_model/test_qa_model/__init__.py
 delete mode 100644 tests/st/test_model/test_qa_model/test_qa_model.py
 delete mode 100644 tests/st/test_model/test_sam_model/__init__.py
 delete mode 100644 tests/st/test_model/test_sam_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_sam_model/test_pipeline.py
 delete mode 100644 tests/st/test_model/test_swin_model/__init__.py
 delete mode 100644 tests/st/test_model/test_swin_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_t5_model/__init__.py
 delete mode 100644 tests/st/test_model/test_t5_model/test_t5_generation.py
 delete mode 100644 tests/st/test_model/test_t5_model/test_t5_tokenizer.py
 delete mode 100644 tests/st/test_model/test_t5_model/test_t5_trainer.py
 delete mode 100644 tests/st/test_model/test_tokcls_model/__init__.py
 delete mode 100644 tests/st/test_model/test_tokcls_model/test_tokcls_model.py
 delete mode 100644 tests/st/test_model/test_tokcls_model/test_tokcls_tokenizer.py
 delete mode 100644 tests/st/test_model/test_txtcls_model/__init__.py
 delete mode 100644 tests/st/test_model/test_txtcls_model/test_txtcls_model.py
 delete mode 100644 tests/st/test_model/test_vit_model/__init__.py
 delete mode 100644 tests/st/test_model/test_vit_model/test_auto_class.py
 delete mode 100644 tests/st/test_model/test_wizardcoder_model/__init__.py
 delete mode 100644 tests/st/test_model/test_wizardcoder_model/test_trainer.py
 delete mode 100644 tests/st/test_model/test_wizardcoder_model/test_training_precision.py

diff --git a/docs/model_cards/bert.md b/docs/model_cards/bert.md
deleted file mode 100644
index 2061b752..00000000
--- a/docs/model_cards/bert.md
+++ /dev/null
@@ -1,165 +0,0 @@
-# BERT
-
-## 模型描述
-
-BERT:全名`Bidirectional Encoder Representations from Transformers`模型是谷歌在2018年基于Wiki数据集训练的Transformer模型。
-
-[论文](https://arxiv.org/abs/1810.04805)J Devlin，et al., Pre-training of Deep Bidirectional Transformers for Language Understanding, 2019
-
-## 预训练数据集下载
-
-1. 从[zhwiki](https://dumps.wikimedia.org/zhwiki/)或[enwiki](https://dumps.wikimedia.org/enwiki/)中下载数据集。
-2. 使用[WikiExtractor](https://github.com/attardi/wikiextractor)提取和整理数据集中的文本，执行命令如下：
-
-```shell
-pip install wikiextractor
-python -m wikiextractor.WikiExtractor -o <output file path> -b <output file size> <Wikipedia dump file>
-```
-
-### 数据处理
-
-#### TFRecord类型BERT预训练数据
-
-用户可以参考[BERT](https://github.com/google-research/bert#pre-training-with-bert)代码仓中的create_pretraining_data.py文件，
-进行`TFRecord`格式文件的生成，
-如果出现下述报错
-
-```bash
-AttributeError: module 'tokenization' has no attribute 'FullTokenizer'
-```
-
-请安装`bert-tensorflow`。注意，用户需要同时下载对应的`vocab.txt`文件。
-
-## 快速使用
-
-### 脚本启动
-
-> 需开发者提前clone工程。
-
-- 请参考[使用脚本启动](../../README.md#方式一使用已有脚本启动)
-
-示例命令如下，将会执行一个12层的BERT模型训练
-
-```shell
-python run_mindformer.py --config configs/bert/run_bert_base_uncased.yaml --run_mode train  \
-                         --device_target Ascend \
-                         --train_dataset_dir /your_path/wiki_data
-```
-
-### 调用API启动
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-
-- Model调用接口
-
-```python
-from mindformers import BertForPreTraining, BertConfig
-
-BertForPreTraining.show_support_list()
-# 输出：
-# - support list of BertForPreTraining is:
-# -    ['bert_base_uncased']
-# - -------------------------------------
-
-# 模型标志加载模型
-model = BertForPreTraining.from_pretrained("bert_base_uncased")
-
-#模型配置加载模型
-config = BertConfig.from_pretrained("bert_base_uncased")
-# {'model_config': {'use_one_hot_embeddings': False, 'num_labels': 1, 'dropout_prob': 0.1,
-# 'batch_size': 128, seq_length: 128, vocab_size: 30522, embedding_size: 768, num_layers: 12,
-# num_heads: 12, expand_ratio: 4, hidden_act: "gelu", post_layernorm_residual: True,
-# hidden_dropout_prob: 0.1, attention_probs_dropout_prob: 0.1, max_position_embeddings: 512,
-# type_vocab_size: 2, initializer_range: 0.02, use_relative_positions: False,
-# use_past: False, checkpoint_name_or_path: "bert_base_uncased"}}
-model = BertForPreTraining(config)
-```
-
-- Trainer接口开启训练/评估/推理：
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers.trainer import Trainer
-
-# 初始化预训练任务
-trainer = Trainer(task='fill_mask',
-    model='bert_base_uncased',
-    train_dataset='/your_path/wiki_data')
-trainer.train() # 开启预训练
-```
-
-### 多卡训练
-
-- 单机8卡数据并行训练BERT-base模型
-
-```bash
-RANK_SIZE=$1
-HOSTFILE=$2
-
-mpirun --allow-run-as-root -n $RANK_SIZE --hostfile $HOSTFILE \
-      --output-filename run_distributed_train_bert \
-      -x NCCL_IB_HCA -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_SOCKET_IFNAME -n $RANK_SIZE \
-python run_mindformer.py --config ./configs/bert/run_bert_base_uncased.yaml --use_parallel True --run_mode train  > distribute_train_gpu_log.txt 2>&1 &
-```
-
-其中各个参数的含义：
-
-- RANK_SIZE：总共使用的卡的数量，采用单机8卡训练时，设为8
-
-- HOSTFILE：一个文本文件，格式如下
-
-```text
-10.1.2.3 slots=8
-```
-
-表示节点ip为10.1.2.3的服务器拥有8张设备卡。用户应该将自己的实际IP替换掉10.1.2.3。
-
-日志会重定向到`distribute_train_gpu_log.txt`中。可以通过`tail -f distribute_train_gpu_log.txt`的
-命令及时刷新日志。注意此时8张卡的日志都会输出到上述的文件中，造成重复输出。用户在如下的位置查看每卡的输出
-
-```bash
-tail -f run_distributed_train_bert/1/rank.0/stdout
-```
-
-要完成上述训练只需输入命令
-
-```bash
-bash scripts/examples/masked_language_modeling/bert_pretrain_distributed_gpu.sh RANK_SIZE hostfile
-```
-
-即可。
-
-#### 计算Loss
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers import BertForPreTraining, BertTokenizer
-from mindspore import Tensor
-import mindspore.common.dtype as mstype
-model = BertForPreTraining.from_pretrained('bert_base_uncased')
-tokenizer = BertTokenizer.from_pretrained('bert_base_uncased')
-data = tokenizer("Paris is the [MASK] of France.",
-                 max_length=128, padding="max_length")
-input_ids = Tensor([data['input_ids']], mstype.int32)
-attention_mask = Tensor([data['attention_mask']], mstype.int32)
-token_type_ids = Tensor([data['token_type_ids']], mstype.int32)
-masked_lm_positions = Tensor([[4]], mstype.int32)
-next_sentence_labels = Tensor([[1]], mstype.int32)
-masked_lm_weights = Tensor([[1]], mstype.int32)
-masked_lm_ids = Tensor([[3007]], mstype.int32)
-output = model(input_ids, attention_mask, token_type_ids, next_sentence_labels, masked_lm_positions, masked_lm_ids, masked_lm_weights)
-print(output)
-#[0.6706]
-```
-
-## 模型权重
-
-本仓库中的`bert_base_uncased`来自于HuggingFace的[`bert_base_uncased`](https://huggingface.co/bert-base-uncased), 基于下述的步骤获取：
-
-1. 从上述的链接中下载`bert_base_uncased`的HuggingFace权重，文件名为`pytorch_model.bin`
-
-2. 执行转换脚本，得到转换后的输出文件`mindspore_t5.ckpt`
-
-```shell
-python mindformers/models/bert/convert_weight.py --layers 12 --torch_path pytorch_model.bin --mindspore_path ./mindspore_bert.ckpt
-```
diff --git a/docs/model_cards/bloom.md b/docs/model_cards/bloom.md
deleted file mode 100644
index 025272ee..00000000
--- a/docs/model_cards/bloom.md
+++ /dev/null
@@ -1,876 +0,0 @@
-# Bloom
-
-## 模型描述
-
-Bloom (BigScience Large Open-science Open-access Multilingual) 是一个开源的开放权限的自回归大语言模型(LLM)，用于对用自然语言表达的多种下游任务进行文本生成。Bloom系列模型涵盖从560M到176B的多种规模，其中176B千亿级参数的大模型的预训练基于工业级的计算机集群，在46种语言和13种编程语言的文本生成中达到比拟人类写作的SOTA效果。对于训练数据集中没有显式包括的下游任务，Bloom也可以通过指令的方式，给出令人满意的zero-shot回答。
-
-[BLOOM: A 176B-Parameter Open-Access Multilingual Language Model](https://arxiv.org/abs/2211.05100)
-
-## 模型性能
-
-|       config        |      task       | Datasets | metric | score | [train performance](#预训练) | [predict performance](#基于pipeline的推理) |
-|:-------------------:|:---------------:|:--------:|:------:|:-----:|:-------------------------:|:-------------------------------------:|
-| run_bloom_560m.yaml | text_generation |    -     |   -    |   -   |             -             |                   -                   |
-| run_bloom_7.1b.yaml | text_generation |  Alpaca  |   -    |   -   |   1063tokens/s/p - Atlas 800   |  21.33tokens/s(use_past True) - Atlas 800  |
-| run_bloom_65b.yaml  | text_generation |    -     |   -    |   -   |             -             |                   -                   |
-| run_bloom_176b.yaml | text_generation |    -     |   -    |   -   |             -             |                   -                   |
-
-## 仓库介绍
-
-`Bloom` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/bloom`
-
-    ```
-    bloom
-        ├── __init__.py
-        ├── convert_weight.py         # 权重转换脚本
-        ├── bloom.py                  # 模型实现
-        ├── bloom_config.py           # 模型配置项
-        ├── layers.py                 # bloom 层定义
-        ├── bloom_tokenizer.py        # tokenizer
-    ```
-
-2. 模型配置：`configs/bloom`
-
-    ```
-    bloom
-        ├── run_bloom_560m.yaml     # 560m 用于推理
-        ├── run_bloom_7.1b.yaml     # 7.1b 用于8卡(Atlas 800)训练
-        ├── run_bloom_7.1b_910b.yaml      # 7.1b 用于8卡(Atlas 800T A2)训练
-        └── run_bloom_7.1b_910b_fa.yaml     # 7.1b 用于8卡(Atlas 800T A2)训练，并使用Flash Attention
-    ```
-
-    其中Bloom_7.1b可在单机单卡上推理，在单机8卡上训练。
-
-## 前期准备
-
-### [mindformers安装](../../README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(**多卡运行必须环节**)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(**多机多卡必备环节**)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-作为参考，这里描述CheckPoint在HuggingFace或者官方开源github仓库和MindSpore间的转换，在不同分布式策略间的转换。
-
-如果不需要加载权重，或者使用from_pretrained功能自动下载，则可以跳过此章节。
-
-Mindformers可以直接通过高级接口from_pretrained下载转换好的560M和7.1B两种规模的ckpt,无需手动转换。如需手动下载，下面提供手动下载链接。
-
-| | huggingface | mindspore ckpt | mindspore tokenizer |
-|-|-|-|-|
-|bloom_560m| [bloom_560m](https://huggingface.co/bigscience/bloomz-560m) | [bloom_560m.ckpt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/bloom/bloom_560m.ckpt) | [tokenizer.json]() |
-|bloom_7.1b| [bloom_7.1b](https://huggingface.co/bigscience/bloomz-7b1-mt) | [bloom_7.1b.ckpt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/bloom/bloom_7.1b.ckpt) | 同上 |
-
-higgingface到mindformers的CheckPoint转换由以下命令完成。
-
-```bash
-cd mindformers/models/bloom
-python convert_weight.py --n_head=xx --hidden_size=xx --torch_path=path_to_hf_bin_file_or_folder --mindspore_path=output_path
-```
-
-其中`--n_head=xx --hidden_size=xx`根据模型定义，bloom_560m的分别为16/1024；bloom_7.1b的分别为32/4096.
-
-### [模型权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-从hugging face或官方github仓库转换而来的权重通常是单卡权重，基于该权重进行多卡微调，评测，推理，涉及ckpt从单机策略到分布式策略的切换。
-
-通常训练采用分布式训练，基于该权重进行评测，推理多采用单卡，涉及ckpt从分布式策略到单机策略的切换。
-
-以上涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档模型[权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-## 基于API的快速使用
-
-### 基于AutoClass的使用
-
-可以使用AutoClass接口，通过模型名称获取相应的model/preprocess/tokenizer等实例，并自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/model_name`
-
-```python
-import mindspore as ms
-from mindformers import AutoModel, AutoTokenizer
-
-# 指定图模式，指定使用训练卡id
-ms.set_context(mode=0, device_id=0)
-
-tokenizer = AutoTokenizer.from_pretrained("bloom_560m")
-model = AutoModel.from_pretrained("bloom_560m")
-
-inputs = tokenizer("what color is the sky?")
-
-outputs = model.generate(inputs["input_ids"], max_length=100)
-response = tokenizer.decode(outputs, skip_special_tokens=True)[0]
-print(response)
-# output
-# what color is the sky? blue
-```
-
-### 基于Pipeline的快速推理
-
-```python
-from mindformers import AutoModel, AutoTokenizer, TextGenerationPipeline
-import mindspore as ms
-
-# 指定图模式，指定使用训练卡id
-ms.set_context(mode=0, device_id=0)
-
-model = AutoModel.from_pretrained("bloom_560m")
-tokenizer = AutoTokenizer.from_pretrained("bloom_560m")
-bloom_ppl = TextGenerationPipeline(model, tokenizer, max_length=256)
-
-result = bloom_ppl([
-    "what color is the sky?",
-    "Translate to English: Je t’aime."
-    ])
-print(result)
-
-# expect print result
-# {'text_generation_text': ['what color is the sky? blue</s>']}, {'text_generation_text': ['Translate to English: Je t’aime. I love you.</s>']}]
-
-```
-
-## 预训练
-
-### 数据集准备-预训练
-
-这里以Alpaca为例，数据大概21MB,用于调试。
-首先去官方下载[alpaca_data.json文件](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json)
-然后调用`mindformers/tools/dataset_preprocess/bloom/make_mindrecord.py`脚本将json转换成mindrecord文件。
-
-```bash
-python mindformers/tools/dataset_preprocess/bloom/make_mindrecord.py --input_dataset_file=XXX/alpaca_data.json --output_path=XXX --N=51200
-```
-
-其中`--N=51200`表示将json中的52002条数据中的前51200转换成mindrecord(推荐)，`--N=-1`将转换全部json中的数据. 在执行此脚本时，对于每个prompt如下操作将被执行：
-
-- 将问题和回答按照模板制作成prompt text;
-- 使用BloomTokenizer将prompt从text转成token ids;
-- 添加eos_token_id直到seq_length。
-
-执行文本后，`--output_path`目录下将生成mindrecord文件。
-
-### 脚本启动
-
-#### 多卡训练
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/bloom/run_bloom_7.1b.yaml [0,8] train 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/bloom/run_bloom_65b.yaml [0,8] train $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/bloom/run_bloom_65b.yaml [$rank_start,$rank_end] train $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 微调
-
-### 数据集准备
-
-参考[数据集准备-预训练](#数据集准备-预训练)
-
-### 全参微调
-
-当前模型已支持使用**Flash Attention算法**进行全参微调，请使用`configs/bloom/run_bloom_7.1b_910b_fa.yaml`替换下述说明中的配置文件以使能Flash Attention。关于Flash Attention，请参考 [Flash Attention使用文档](../feature_cards/Training_Algorithms.md#flash-attention)
-
-通过yaml配置文件中的`load_checkpoint:`字段来控制是否加载CKPT
-
-#### 多卡微调
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE../configs/bloom/run_bloom_7.1b.yaml [0,8] finetune 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/bloom/run_bloom_65b.yaml [0,8] finetune $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/bloom/run_bloom_65b.yaml [$rank_start,$rank_end] finetune $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-### 微调后对话效果
-
-在`mindformers/scripts`路径下执行以下脚本`combine_ckpt.py`.这个脚本会
-
-- 对strategy进行合并
-- 清理微调ckpt文件中的优化器状态
-- 合并微调ckpt文件用于单机推理
-
-```python
-# combine_ckpt.py
-import os
-import mindspore as ms
-
-CKPT_SUFFIX = "300_8" # 300(sink number) * 8 (sink size) = 2400 step
-CLEANED_CKPT_DIR = "../output/checkpoint_cleaned"
-COMBINED_CKPT_DIR = "../output/checkpoint_combined"
-COMBINED_STGY = "../output/strategy/ckpt_strategy.ckpt"
-
-
-# combine straegies
-ms.merge_pipeline_strategys("../output/strategy", COMBINED_STGY)
-
-
-# clean ckpt by removing optimizer states
-for rank_id in range(8):
-    input_file_name = f"../output/checkpoint/rank_{rank_id}/mindformers_rank_{rank_id}-{CKPT_SUFFIX}.ckpt"
-    params = ms.load_checkpoint(input_file_name)
-    new_params = [{"name": key, "data": val}  for key, val in params.items() if not ("accu_grads" in key or "adam_" in key) ]
-
-    save_path = os.path.join(CLEANED_CKPT_DIR, f"rank_{rank_id}")
-    os.makedirs(save_path, exist_ok=True)
-    ms.save_checkpoint(new_params, f"{save_path}/cleaned.ckpt")
-    print(f"saved {save_path}")
-
-
-# combine ckpt
-ms.transform_checkpoints(CLEANED_CKPT_DIR, COMBINED_CKPT_DIR, ckpt_prefix = "combined_", src_strategy_file = COMBINED_STGY)
-```
-
-然后执行以下脚本进行新的对话。
-> 以下脚本针对Alpaca数据集的prompt模板。如果使用其他数据集微调，请更换对应模板。
-
-```python
-import numpy as np
-import mindspore as ms
-from mindformers import AutoTokenizer
-from mindformers.models.bloom import BloomConfig, BloomLMHeadModel
-
-ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=0)
-
-alpaca_prompt = (
-    "Below is an instruction that describes a task. "
-    "Write a response that appropriately completes the request.\n\n"
-    "### Instruction:\n{instruction}\n\n### Response:\n")
-
-# 7B
-CKPT_FILE = "xxx/mindformers/output/checkpoint_combined/rank_0/combined_0.ckpt"
-SEQ_LENGTH = 1024
-config = BloomConfig(
-    param_init_type="float16",
-    embedding_init_type="float32",
-    checkpoint_name_or_path=CKPT_FILE,
-    max_decode_length=SEQ_LENGTH,
-    seq_length=SEQ_LENGTH,
-    hidden_size=4096,
-    num_layers=30,
-    num_heads=32,
-    hidden_dropout_rate=0.0,
-    attention_dropout_rate=0.0,
-    batch_size = 1,
-    use_past = True
-)
-
-
-def chat():
-    tokenizer = AutoTokenizer.from_pretrained("bloom_560m")
-    model = BloomLMHeadModel(config)
-    model.set_train(False)
-
-    question_list = [
-        "why the earth is unique?",
-        "why the sky is blue?",
-        "write a job application for a data scientist and explain your related work experience."
-        ]
-
-    while True:
-        if question_list:
-            question = question_list.pop(0)
-        else:
-            question = input("please input your question: ")
-        question = alpaca_prompt.format_map({"instruction":question})
-        inputs = tokenizer.encode(question)
-        inputs = np.array([inputs]).astype(np.int32) # add batch dim
-        outputs = model.generate(inputs, max_length=None, do_sample=False, eos_token_id=2)
-        outputs = outputs[0] # remove batch dim
-        print(tokenizer.decode(outputs))
-
-if __name__ == "__main__":
-    chat()
-
-```
-
-预期的对话效果大致为下表所示
-|                                                                                        |                      Before                     |                                                                                                                                                                                                                                   After                                                                                                                                                                                                                                  |
-|:--------------------------------------------------------------------------------------:|:-----------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| why the sky is blue?                                                                   | light from the sun is scattered<EOS>              | The sky is blue because of the presence of water droplets in the atmosphere. These droplets reflect light back to the sky, causing the sky to appear blue.<EOS>                                                                                                                                                                                                                                                                                                           |
-| what would be the best way to travel from San Fransisco to New York?                   | take a flight<EOS>                                | The best way to travel from San Francisco to New York is by taking the flight. The flight is the fastest and most convenient way to travel from San Francisco to New York.<EOS>                                                                                                                                                                                                                                                                                            |
-| write a job application for a data scientist and explain your related work experience. | <EOS>                                             | Dear Employer, I am writing to apply for the position of Data Scientist. I have over 5 years of experience in data science and machine learning, and I am excited to join your team. I have experience in supervised and unsupervised machine learning algorithms, data visualization, and data cleaning. I am also proficient in Python, R, and SQL. I am looking forward to discussing my qualifications further and hearing from you soon. Sincerely, [Your Name]<EOS>  |
-| why the earth is unique?                                                               | it is the only planet with a liquid surface<EOS>  | The Earth is unique because it is the only planet with a liquid surface, a magnetic field, and a protective atmosphere. It is also the only planet with a life-supporting atmosphere and a diverse and abundant life-supporting ecosystem.<EOS>                                                                                                                                                                                                                            |
-
-## 推理
-
-### 基于pipeline的推理
-
-以下为基于pipeline接口的自定义推理脚本，支持多卡多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-from mindformers import AutoConfig, AutoTokenizer, AutoModel, pipeline
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["what color is the sky?",
-              "Translate to English: Je t’aime."]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained("bloom_7.1b")
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("bloom_7.1b")
-    # build model from config
-    network = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard bloom and load sharded ckpt
-        model = Model(network)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(1, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(model, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer)
-    outputs = text_generation_pipeline(inputs)
-    for output in outputs:
-        print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-以下为多卡运行自定义多batch推理的脚本
-
-```bash
-# >>> `run_predict.sh`文件
-CHECKPOINT_PATH=$2
-export RANK_TABLE_FILE=$1
-
-# define variable
-export RANK_SIZE=8
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export RANK_ID=$i
-    export DEVICE_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./predict_custom.py --use_parallel True --checkpoint_path CHECKPOINT_PATH &> minformers_$RANK_ID.log &
-done
-```
-
-#### 单卡pipeline推理
-
-```bash
-python predict_custom.py
-```
-
-#### 多卡pipeline推理
-
-```bash
-bash run_predict.sh RANK_TABLE_FILE path/to/bloom_7.1b_shard_checkpoint_dir
-```
-
-#### 单卡与多卡pipeline推理预期输出为
-
-- what color is the sky? _**blue**_
-- Translate to English: Je t’aime. _**I love you.**_
-
-### 基于generate的推理
-
-#### 单卡generate推理
-
-```python
-import mindspore as ms
-from mindformers import AutoTokenizer
-from mindformers.models.bloom import BloomConfig, BloomLMHeadModel
-
-ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=0)
-
-# ##############################
-# # bloom_560m config
-# CKPT_FILE="bloom_560m"
-# SEQ_LENGTH = 256
-# config = BloomConfig(
-#     param_init_type="float16",
-#     embedding_init_type="float16",
-#     checkpoint_name_or_path=CKPT_FILE,
-#     max_decode_length=SEQ_LENGTH,
-#     seq_length=SEQ_LENGTH,
-#     hidden_size=1024,
-#     num_layers=24,
-#     num_heads=16,
-#     hidden_dropout_rate=0.0,
-#     attention_dropout_rate=0.0,
-#     batch_size = 1,
-#     use_past = True
-#
-# )
-# ##############################
-
-# 7B
-CKPT_FILE = "bloom_7.1b"
-# CKPT_FILE also takes absolute path to ckpt file, e.g.
-# "/home/xxx/mindformers/checkpoint_download/bloom/bloom_7.1b.ckpt"
-SEQ_LENGTH = 256
-config = BloomConfig(
-    param_init_type="float16",
-    embedding_init_type="float16",
-    checkpoint_name_or_path=CKPT_FILE,
-    max_decode_length=SEQ_LENGTH,
-    seq_length=SEQ_LENGTH,
-    hidden_size=4096,
-    num_layers=30,
-    num_heads=32,
-    hidden_dropout_rate=0.0,
-    attention_dropout_rate=0.0,
-    batch_size=1,
-    use_past=True
-)
-
-
-def chat():
-    tokenizer = AutoTokenizer.from_pretrained("bloom_560m")
-    model = BloomLMHeadModel(config)
-    model.set_train(False)
-
-    question_list = [
-        "what color is the sky?",
-        "Translate to English: Je t’aime.",
-        ]
-
-    for question in question_list:
-        inputs = tokenizer.encode(question)
-        inputs = [inputs]  # add batch dim
-        outputs = model.generate(inputs, max_length=100, do_sample=False)
-        outputs = outputs[0]  # remove batch dim
-        print(tokenizer.decode(outputs, skip_special_tokens=True))
-
-
-if __name__ == "__main__":
-    chat()
-```
-
-- Bloom_560m的预期输出为:
-
-    - what color is the sky? _**blue**_
-    - Translate to English: Je t’aime. _**I love you.**_
-
-- Bloom_7.1B的预期输出为:
-
-    - what color is the sky? _**blue**_
-    - Translate to English: Je t’aime. _**I love you.**_
-
-#### 多卡generate推理
-
-这里我们以1机器8卡推理bloom_7.1B为例。涉及两个文件`chat.py`和`run_chat.py`。
-
-```text
-/SOME/PATH/
-    ├── chat.py # 负责定义一个并行进程
-    └── run_chat.py # 负责多次执行chat.py并拉起分布式
-```
-
-加载ckpt有两种方式, 由`run_chat.py` 中的`DISTRIBUTED_CKPT_PATH`变量来控制。这里默认使用`DISTRIBUTED_CKPT_PATH=""`代表的`Load then Shard`的方式加载ckpt.
-
-| 分布式加载ckpt的方法   | Load then Shard | Shard then Load |
-|-----------------------:|:-----------------|:-----------------|
-| DISTRIBUTED_CKPT_PATH= | "" | "/path/to/distributed/ckpt/" |
-|说明| 先加载全量ckpt，然后切分模型。|先切分模型，然后加载分布式ckpt。|
-|| 不用预先按照策略切分ckpt，推理时可以灵活调整策略。|需要确定推理的分布式策略，并按照策略预先切分ckpt。 |
-|| 对host内存的占用较高。| 对host内存的占用较低。|
-|适用| 适用于较小模型，如`560m`，`7.1b`。|适用于较大模型，如`65b`, `176b`。 |
-
-```python
-# >>> `chat.py`文件
-
-import os
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-from mindspore.parallel import set_algo_parameters
-from mindspore.parallel._cost_model_context import _set_multi_subgraphs
-
-from mindformers import BloomLMHeadModel, BloomConfig, AutoTokenizer
-from mindformers import init_context
-from mindformers.modules import TransformerOpParallelConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools import logger
-
-SEQ_LENGTH = 256
-DISTRIBUTED_CKPT_PATH = os.getenv("DISTRIBUTED_CKPT_PATH", "")
-
-
-# set context
-context_config = {"device_target": "Ascend", "mode": 0,  "max_device_memory": "31GB"}
-parallel_context_config = {"parallel_mode": 1, "gradients_mean": False, "full_batch": True}
-rank_id, device_num = init_context(use_parallel=True, context_config=context_config, parallel_config=parallel_context_config)
-set_algo_parameters(elementwise_op_strategy_follow=True, fully_use_devices=True)
-_set_multi_subgraphs()
-
-
-# config blooom 7.1b
-config = BloomConfig(
-    embedding_init_type="float32" if DISTRIBUTED_CKPT_PATH else "float16",
-    checkpoint_name_or_path="" if DISTRIBUTED_CKPT_PATH else "bloom_7.1b",
-    seq_length=SEQ_LENGTH,
-    hidden_size=4096,
-    num_layers=30,
-    num_heads=32,
-    hidden_dropout_rate=0.0,
-    attention_dropout_rate=0.0,
-    top_k=1, top_p=1, do_sample=True,
-    parallel_config=TransformerOpParallelConfig(
-        data_parallel=1,
-        model_parallel=8,
-        pipeline_stage=1
-        )
-    )
-
-def chat():
-    # init bloom
-    tokenizer = AutoTokenizer.from_pretrained("bloom_560m")
-    bloom = BloomLMHeadModel(config)
-    bloom.set_train(False)
-
-    if DISTRIBUTED_CKPT_PATH:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(DISTRIBUTED_CKPT_PATH, "rank_{}".format(rank_id))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        logger.info("ckpt path: %s", str(ckpt_path))
-
-        # shard bloom and load sharded ckpt
-        m = Model(bloom)
-        m.infer_predict_layout(ms.Tensor(np.ones(shape=(1, SEQ_LENGTH)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(bloom, checkpoint_dict)
-        logger.info("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    question_list = [
-        "what color is the sky?",
-        "Translate to English: Je t’aime.",
-        ]
-
-    for question in question_list:
-        inputs = tokenizer.encode(question)
-        inputs = [inputs]  # add batch dim
-        outputs = bloom.generate(inputs, max_length=100, do_sample=False)
-        outputs = outputs[0]  # remove batch dim
-        print(tokenizer.decode(outputs, skip_special_tokens=True))
-
-
-if __name__ == "__main__":
-    chat()
-
-```
-
-```bash
-# >>> `run_chat.sh`文件
-
-# define variable
-export RANK_SIZE=8
-export RANK_TABLE_FILE="../hccl_8p.json" # <<< change to yours
-
-# distributed ckpt path to load after sharding model.
-# use "" if load full ckpt before sharding model.
-export DISTRIBUTED_CKPT_PATH=""
-
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export RANK_ID=$i
-    export DEVICE_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./chat.py &> minformers_$RANK_ID.log &
-done
-
-```
-
-使用一下命令拉起分布式推理:
-
-```bash
-bash run_chat.sh
-```
-
-日志可以通过`tail -f mindformers_0.log`查看。预期结果与单机单卡`bloom_7.1b`推理相同：
-
-- what color is the sky? _**blue**_
-
-- Translate to English: Je t’aime. _**I love you.**_
-
-## 附录
-
-### 附录A BELLE
-
-[BELLE](https://github.com/LianjiaTech/BELLE)（Be Everyone's Large Language model Engine）是一个旨在促进中文对话大模型开源社区发展的组织。BELLE-7B是基于Bloomz-7B-mt，使用中文问答数据集微调出来开源的中文对话模型。根据微调所使用的中文数据大小分为0.2M, 0.6M, 1M, 2M四个权重。
-微调的模板为
-> Human: {input} \n\nAssistant:{output}
-
-原始的开源BELLE的数据集和权重可以通过以下链接获得
-
-|          | 文件 | 链接                                                     |
-|----------| --- |--------------------------------------------------------|
-| 2M SFT数据集 | train_2M_CN.json | https://huggingface.co/BelleGroup/BELLE-7B-2M          |
-| 2M 模型权重   | pytorch_model.bin | https://huggingface.co/datasets/BelleGroup/train_2M_CN |
-
-数据集和权重的转换参考2.1章和2.2章，命令如下：
-
-```bash
-# 数据集转换
-python mindformers/tools/dataset_preprocess/bloom/make_mindrecord.py --input_dataset_file=XXX/train_2M_CN.json --output_path=XXX
-
-# 权重转换
-cd mindformers/models/bloom
-python convert_weight.py --n_head=32 --hidden_size=4096 --torch_path=xxx/pytorch_model.bin --mindspore_path=output_path
-
-```
-
-数据集和权重的转换到mindspore后，可以按照Bloom的方式进行推理和微调。
diff --git a/docs/model_cards/clip.md b/docs/model_cards/clip.md
deleted file mode 100644
index e1cbc6eb..00000000
--- a/docs/model_cards/clip.md
+++ /dev/null
@@ -1,166 +0,0 @@
-# CLIP
-
-## 模型描述
-
-CLIP (Contrastive Lanuguage-Image Pre-Training)：是一种基于图文对进行训练的transformer模型，在预训练完成以后，任意给定一张图片，它可以在不用微调的情况下，完成对图片的零样本分类。
-
-[论文](https://arxiv.org/abs/2103.00020) Alec Radford, Jong Wook Kim, et al., Learning Transferable Visual Models From Natural Language Supervision, 2021.
-
-注：CLIP训练代码未开源，故MindFormers提供训练pretrain、finetune功能，但不不保证精度，目前仅对zero shot图片分类精度做了对齐。
-
-## 数据集准备
-
-### 预训练使用数据集：Flickr8k([链接](https://pan.baidu.com/s/1LRlQUL1MRipPL4MLOdExzg)，密码: s4be)
-
-- 数据集大小：2.2G，共8000张彩色图像，每张图像都与五个不同的标题配对，这些标题提供了对图片中物体和事件的内容描述
-    - 训练集：6000张图像
-    - 验证集：1000张图像
-    - 测试集：1000张图像
-- 数据格式：RGB
-
- ```bash
-数据集目录格式
-└─Flickr8k
-    ├─Flickr8k_Dataset
-    |      └─Flickr8k_Dataset
-    └─Flickr8k_text
-           ├─Flickr8k.devImages.txt
-           ├─Flickr8k.testImages.txt
-           ├─Flickr8k.trainImages.txt
-           └─Flickr8k.token.txt
- ```
-
-### 零样本下游任务使用的数据集：[Cifar100](http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz)
-
-- 数据集大小：161M，共60000张图片，100个类别
-    - 训练集：50000张图片
-    - 测试集：10000张图片
-- 数据格式：二进制文件
-
- ```bash
-数据集目录格式
-└─cifar-100-python
-    ├─meta
-    ├─test  
-    └─train  
- ```
-
-## 快速使用
-
-### 脚本启动
-
-> 需开发者提前clone工程。
-
-- 请参考[使用脚本启动](../../README.md#方式一使用已有脚本启动)
-
-- 脚本运行测试
-
-当前clip多卡精度有异常，仅支持单卡，后续版本会修复
-
-```shell
-# pretrain
-python run_mindformer.py --config ./configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml --run_mode train --train_dataset_dir [DATASET_PATH]
-
-# evaluate
-python run_mindformer.py --config ./configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml --run_mode eval --eval_dataset_dir [DATASET_PATH]
-
-# predict
-python run_mindformer.py --config ./configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml --run_mode predict --predict_data [PATH_TO_IMAGE]
-```
-
-### 调用API启动
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-
-- Model调用接口
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers import CLIPModel, CLIPConfig
-
-CLIPModel.show_support_list()
-# 输出：
-# - support list of CLIPModel is:
-# -    ['clip_vit_b_32', 'clip_vit_B_16', 'clip_vit_l_14', 'clip_vit_l_14@336']
-# - -------------------------------------
-
-# 模型标志加载模型
-model = CLIPModel.from_pretrained("clip_vit_b_32")
-
-#模型配置加载模型
-config = CLIPConfig.from_pretrained("clip_vit_b_32")
-# {'text_config': {'hidden_size': 512, 'vocab_size': 49408, 'max_position_embeddings': 77,
-# 'num_hidden_layers': 12}, 'vision_config': {'hidden_size': 768, 'image_size': 224, 'patch_size': 32,
-# 'num_hidden_layers': 12}, 'projection_dim': 512, 'ratio': 64, 'checkpoint_name_or_path': 'clip_vit_b_32',
-# 'dtype': 'float16'}
-model = CLIPModel(config)
-```
-
-- Trainer接口开启训练/评估/推理：
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers.trainer import Trainer
-from mindformers.tools.image_tools import load_image
-# 初始化预训练任务
-trainer = Trainer(task='contrastive_language_image_pretrain',
-    model='clip_vit_b_32',
-    train_dataset='./Flickr8k')
-trainer.train() # 开启预训练
-
-#初始化零样本图像分类下游任务
-trainer = Trainer(task='zero_shot_image_classification',
-    model='clip_vit_b_32',
-    eval_dataset='./cifar-100-python')  
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-
-# 方式1: 使用训练好的权重进行评估和推理
-trainer.evaluate(eval_checkpoint=True)
-predict_result = trainer.predict(predict_checkpoint=True, input_data=img, top_k=3)
-print(predict_result)
-
-# 方式2: 从obs下载训练好的权重并进行评估和推理
-trainer.evaluate()  #下载权重进行评估
-predict_result = trainer.predict(input_data=img, top_k=3)  #下载权重进行推理
-print(predict_result)
-```
-
-- pipeline接口开启快速推理
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers import pipeline
-from mindformers.tools.image_tools import load_image
-
-classifier = pipeline("zero_shot_image_classification",
-                      model="clip_vit_b_32",
-                      candidate_labels=["sunflower", "tree", "dog", "cat", "toy"])
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2."
-          "myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-classifier(img)
-# 输出
-# [[{'score': 0.99995565, 'label': 'sunflower'}, {'score': 2.5318595e-05, 'label': 'toy'},
-# {'score': 9.903885e-06, 'label': 'dog'}, {'score': 6.75336e-06, 'label': 'tree'},
-# {'score': 2.396818e-06, 'label': 'cat'}]]
-```
-
-## 模型性能
-
-| model |           task_type            |                                  model_Type                                   | datasets |              Top1-accuracy              | log |                                                                                                example                                                                                                |
-|:-----:|:------------------------------:|:-----------------------------------------------------------------------------:|:--------:|:---------------------------------------:|:---:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| clip  |           pretrained           | clip_vit_b_32 <br> clip_vit_b_16 <br> clip_vit_l_14 <br> clip_vit_l_14@336 | flickr8k |                    \                    |  \  |                                               pretrain [link](https://gitee.com/mindspore/mindformers/blob/dev/scripts/examples/contrastive_language_image_pretrain/clip_vit_b_32_pretrain_on_flickr8k.sh)                                               | \|
-| clip  | zero_shot_image_classification | clip_vit_b_32 <br> clip_vit_b_16 <br> clip_vit_l_14 <br> clip_vit_l_14@336 | cifar100 | 57.24%<br>61.41%<br>69.67%<br>68.19% |  \  | eval [link](https://gitee.com/mindspore/mindformers/blob/dev/scripts/examples/zero_shot_image_classification/clip_vit_b_32_eval_on_cifar100.sh) <br> predict [link](https://gitee.com/mindspore/mindformers/blob/dev/scripts/examples/zero_shot_image_classification/clip_vit_b_32_predict_on_cifar100.sh) |
-
-## 模型权重
-
-本仓库中的`clip_vit_b_32`来自于openai/clip的[`ViT-B/32`](https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt), 基于下述的步骤获取：
-
-1. 从上述的链接中下载`ViT-B/32`的模型权重
-
-2. 执行转换脚本，得到转换后的输出文件`clip_vit_b_32.ckpt`
-
-其余参数获取方式相同
-
-```shell
-python mindformers/models/clip/convert_weight.py --torch_path "PATH OF ViT-B/32.pt" --mindspore_path "SAVE PATH OF clip_vit_b_32.ckpt"
-```
\ No newline at end of file
diff --git a/docs/model_cards/codegeex2.md b/docs/model_cards/codegeex2.md
deleted file mode 100644
index ce2433dc..00000000
--- a/docs/model_cards/codegeex2.md
+++ /dev/null
@@ -1,714 +0,0 @@
-# CodeGeeX2-6B
-
-## 模型描述
-
-CodeGeeX**2**-6B 是多语言代码生成模型 CodeGeeX的第二代版本。不同于一代CodeGeeX，CodeGeeX2是基于ChatGLM2结构加入代码预训练实现，得益于ChatGLM2的更优性能，CodeGeeX2在多项指标上取得性能提升。
-
-## 模型性能
-
-- 基于Atlas 800T A2
-
-|                                   config                                    |      task       | Datasets | metric |   score    | [train performance](#预训练) | [predict performance](#基于pipeline的推理) |
-|:---------------------------------------------------------------------------:| :-------------: |:--------:| :----: | :--------: |:-------------------------:|:-------------------------------------:|
-|   [codegeex2_6b](../../configs/codegeex2/run_codegeex2_6b_finetune.yaml)    | text_generation |  CodeAlpaca   |   -    |     -      |      1421 tokens/s/p      |   20.17 tokens/s/p (use past True)    |
-| [codegeex2_6b](../../configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml) | text_generation |  CodeAlpaca   |   -    |     -      |     2167.2 tokens/s/p     |   20.31 tokens/s/p (use past True)    |
-
-## 仓库介绍
-
-`codegeex2-6B` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/glm2`
-
-    ```bash
-    glm2
-        ├── __init__.py
-        ├── glm2.py                  # 模型实现
-        ├── glm2_config.py           # 模型配置项
-        ├── glm2_modules.py          # 模组实现
-        ├── glm2_tokenizer.py        # tokenizer
-        └── glm2_transformer.py      # transformer层实现
-    ```
-
-2. 模型配置：`configs/codegeex2`
-
-    ```bash
-    codegeex2
-        ├── run_codegeex2_6b_fintune.yaml  # 全量微调启动配置
-        └── run_codegeex2_6b.yaml     # 推理配置
-    ```
-
-## 前期准备
-
-### [mindformers安装](../../README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环节)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-本仓库中的`codegeex2`来自于HuggingFace的 [CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b)，基于下述的步骤获取：
-
-1. 克隆codegeex2-6b代码仓，下载分布式的模型文件。
-
-   ```shell
-   git lfs install
-   git clone https://huggingface.co/THUDM/codegeex2-6b
-   ```
-
-2. 执行 python 脚本，合并模型权重,模型转换权重需要依赖transformer版本为4.30.2，可参阅[CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b)。
-
-   ```python
-   from transformers import AutoTokenizer, AutoModel
-   import torch
-
-   tokenizer = AutoTokenizer.from_pretrained("THUDM/codegeex2-6b", trust_remote_code=True)
-   model = AutoModel.from_pretrained("THUDM/codegeex2-6b", trust_remote_code=True)
-
-   with open("pt_model_arch.txt", "w") as fp:
-       print(model, file=fp, flush=True)
-   with open("pt_ckpt.txt", "w") as fp:
-       for name, param in model.named_parameters():
-           fp.write(f"{name} {param.shape} {param.dtype}\n")
-   torch.save(model.state_dict(), "codegeex2_6b.pth")
-   ```
-
-3. 执行转换脚本，得到转换后的输出文件`codegeex2_6b.ckpt`。
-
-   ```python
-   import mindspore as ms
-   import torch as pt
-   from tqdm import tqdm
-
-   pt_ckpt_path = "/path/to/codegeex2_6b.pth"
-   pt_param = pt.load(pt_ckpt_path)
-
-   type_map = {"torch.bfloat16": "ms.float32",
-                "torch.float32": "ms.float32"}
-   ms_param = []
-   with open("./check_pt_ckpt.txt", "w") as fp:
-       for k, v in tqdm(pt_param.items()):
-           if v.dtype is pt.bfloat16:
-               v = v.to(dtype = pt.float32)
-           if "word_embeddings.weight" in k:
-               k = k.replace("word_embeddings.weight", "embedding_table")
-           fp.write(f"{k} {v.shape} {v.dtype}\n")
-           ms_param.append({"name": k, "data": ms.Tensor(v.numpy())})
-
-   ms.save_checkpoint(ms_param, "/path/to/codegeex2_6b.ckpt")
-   ```
-
-4. 也可获取MindFormers提供的已转换权重
-
-   可通过from_pretrained接口下载，也可直接从下面的链接获取
-
-   [codegeex2_6b权重](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/codegeex2/codegeex2_6b.ckpt)
-
-   [tokenizer文件](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/codegeex2/tokenizer.model)
-
-### [模型权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-从hugging face或官方github仓库转换而来的权重通常是单卡权重，基于该权重进行多卡微调，评测，推理，涉及ckpt从单机策略到分布式策略的切换。
-
-通常训练采用分布式训练，基于该权重进行评测，推理多采用单卡，涉及ckpt从分布式策略到单机策略的切换。
-
-以上涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档模型[权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-## 基于API的快速使用
-
-### AutoClass推理
-
-可以使用AutoClass接口，通过模型名称获取相应的模型/tokenizer实例，并自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/codegeex2`
-
-首次运行pipeline推理时需要进行模型编译，需等待一段时间
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers import AutoTokenizer, AutoModel
-
-
-tokenizer = AutoTokenizer.from_pretrained("codegeex2_6b")
-model = AutoModel.from_pretrained("codegeex2_6b")
-
-prompt = "#language: Python\n# write a bubble sort function\n"
-inputs = tokenizer.encode(prompt)
-outputs = model.generate(inputs, max_length=256, top_k=1)
-response = tokenizer.decode(outputs[0])
-print(response)
-```
-
-**注：快速使用仅限单卡，该示例支持6B模型。**
-
-### 基于Trainer的快速训练，微调，评测，推理
-
-> 注：下面仅显示接口使用方式，模型启动训练需求多卡分布式训练，训练脚本需配合分布式脚本启动
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-from mindformers import AutoTokenizer
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 初始化预训练任务
-tokenizer = AutoTokenizer.from_pretrained("codegeex2_6b")
-trainer = Trainer(task='text_generation',
-                  model='codegeex2_6b',
-                  train_dataset='path/to/train_dataset',
-                  eval_dataset='path/to/eval_dataset',
-                  tokenizer=tokenizer)
-
-# 开启预训练
-# trainer.train()
-
-# 开启全量微调
-# trainer.finetune()
-
-# 开启推理
-predict_result = trainer.predict(input_data="#language: Python\n# write a bubble sort function\n")
-# output result is: [{'text_generation_text': ['#language: Python\n# write a bubble sort function\n\ndef bubble_sort(list):\n for i in range(len(list) - 1):\n for j in range(len(list) - 1):\n if list[j] > list[j + 1]:\n list[j], list[j + 1] = list[j + 1], list[j]\n return list\n\n\n print(bubble_sort([5, 2, 1, 8, 4]))']}]
-```
-
-**注：使用前请参照微调部分更改数据集设置，多卡请参考[使用高阶接口开发教程](https://mindformers.readthedocs.io/zh_CN/latest/docs/practice/Develop_With_Api.html)。**
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-pipeline_task = pipeline("text_generation", model='codegeex2_6b', max_length=500)
-pipeline_result = pipeline_task("#language: Python\n# write a bubble sort function\n", top_k=1)
-print(pipeline_result)
-# output result is: [{'text_generation_text': ['#language: Python\n# write a bubble sort function\n\ndef bubble_sort(list):\n for i in range(len(list) - 1):\n for j in range(len(list) - 1):\n if list[j] > list[j + 1]:\n list[j], list[j + 1] = list[j + 1], list[j]\n return list\n\n\n print(bubble_sort([5, 2, 1, 8, 4]))']}]
-```
-
-**注：快速使用仅限单卡，该示例支持6B模型。**
-**注：多卡请参考[基于pipeline的推理](#基于pipeline的推理)。**
-
-## 微调
-
-### 数据集准备
-
-数据处理方法同GLM2相同，其组织方式如下：
-
-```json
-{
-    "PROMPT": "Create an array of 10 elements using java",
-    "ANSWER": "int[] array = new int[10];"
-}
-```
-
-从 [CodeAlpaca](https://github.com/sahil280114/codealpaca/blob/master/data/code_alpaca_20k.json) 或者 [Hugging Face](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) 下载数据集，并且处理其目录结构为
-
-```shell
-CodeAlpaca
-  ├── train.json
-  └── dev.json
-```
-
-处理脚本可以参考：`mindformers/tools/dataset_preprocess/codegeex2/codealpaca_preprocess.py`
-
-将任务配置文件 `configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml` 中的 `==== dataset config ====` 部分替换成：
-
-```yaml
-train_dataset: &train_dataset
-  data_loader:
-    type: ADGenDataLoader
-    dataset_dir: "/path/to/CodeAlpaca/train.json"
-    shuffle: True
-    phase: "train"
-    version: 2
-    origin_columns: ["PROMPT", "ANSWER"]
-  tokenizer:
-    type: ChatGLM2Tokenizer
-    vocab_file: "/path/to/tokenizer.model"
-  input_columns: ["input_ids", "labels"]
-  max_source_length: 1023
-  max_target_length: 1024
-  ignore_pad_token_for_loss: True
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-  seed: 0
-
-train_dataset_task:
-  type: KeyWordGenDataset
-  dataset_config: *train_dataset
-
-eval_dataset: &eval_dataset
-  data_loader:
-    type: ADGenDataLoader
-    dataset_dir: "/path/to/CodeAlpaca/dev.json"
-    shuffle: False
-    phase: "eval"
-    version: 2
-    origin_columns: ["PROMPT", "ANSWER"]
-  tokenizer:
-    type: ChatGLM2Tokenizer
-    vocab_file: "/path/to/tokenizer.model"
-  max_source_length: 256
-  max_target_length: 256
-  ignore_pad_token_for_loss: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-  seed: 0
-
-eval_dataset_task:
-  type: KeyWordGenDataset
-  dataset_config: *eval_dataset
-```
-
-### 全参微调
-
-当前模型已支持使用**Flash Attention算法**进行全参微调，请参考 [Flash Attention使用文档](../feature_cards/Training_Algorithms.md#flash-attention)
-
-#### 单卡微调
-
-**注：在Atlas 800T A2上无法单卡全参微调codegeex2模型。**
-
-#### 单机多卡全参微调
-
-全参微调使用 `configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml` 配置文件，配置文件中定义了微调所需的各配置项
-
-修改数据集/模型权重配置路径：
-
-- 数据集：修改 `mindformers/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml` 脚本中`train_dataset` 的 `dataset_dir` 为前文生成的数据集路径。
-- 加载预训练模型权重：修改 `mindformers/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml` 脚本中的 `load_checkpoint` 为预训练模型权重路径。
-
-启动全参微调脚本：
-
-```shell
-cd scripts
-# Usage Help: bash run_distribute.sh [RANK_TABLE_FILE] [CONFIG_PATH] [DEVICE_RANGE] [RUN_STATUS]
-bash run_distribute.sh /path/to/hccl_8p_01234567_127.0.1.1.json ../configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml '[0,8]' finetune
-# 将此处rank_table_file替换为实际路径
-```
-
-参数说明
-
-```text
-RANK_TABLE_FILE: 由mindformers/tools/hccl_tools.py生成的分布式json文件
-CONFIG_PATH: 为configs文件夹下面的codegeex2/run_codegeex2_6b_finetune_2048.yaml配置文件
-DEVICE_RANGE: 为单机分布式卡的范围，如 '[0,8]' 为8卡分布式，不包含8本身
-RUN_STATUS: 为任务运行状态，支持关键字 train\finetune\eval\predict
-```
-
-训练的log日志路径：mindformers/output/log
-
-checkpoint存储路径：mindformers/output/checkpoint
-
-#### <span id="jump">多机多卡启动</span>
-
-- step 1. 首先参考单机多卡启动方式，在每台机器上运行`mindformers/tools/hccl_tools.py`生成`RANK_TABLE_FILE`的json文件。
-
-```shell
-# 在每个机器上运行如下命令，生成各自的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/hccl_tools.py --device_num [0,8]
-```
-
-- step 2. 合并每台机器上生成的`RANK_TABLE_FILE`。
-
-将不同机器上生成的`RANK_TABLE_FILE`文件拷贝到一起，执行`merge_hccl.py`脚本进行合并，包括server_list合并，`server_count`设为机器数，`rank_id`顺序增加。
-
-```shell
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-- step 4. 根据服务器节点数等信息，修改相应的配置。
-
-```shell
-# 以codegeex2-6b模型两机训练为例，默认配置2机16卡，如果节点数有变，需要修改相应的配置。
-# 配置文件路径：../configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 16
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-```
-
-- step 5. 执行运行脚本。
-
-在多机上同时拉起任务，每台机器拉起方式参考单机多卡启动方式。需注意，多机多卡的拉起方式，相对于单机多卡，多了一个总卡数`[RANK_SIZE]`的入参。
-
-```shell
-# 第一台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml [0,8] finetune 16
-# 第二台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the second device} ../configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml [8,16] finetune 16
-```
-
-## 推理
-
-### 基于pipeline的推理
-
-以下为基于pipeline接口的自定义推理脚本，支持多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-
-from mindformers import AutoConfig, AutoTokenizer, AutoModel, pipeline
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["#language: Python\n# write a bubble sort function\n",
-              "#language: Python\n# write a quick sort function\n",
-              "#language: Python\n# write a heap sort function\n"]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained("codegeex2_6b")
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("codegeex2_6b")
-    # build model from config
-    network = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard codegeex2 and load sharded ckpt
-        model = Model(network)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(model_config.batch_size, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(model, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer)
-    outputs = text_generation_pipeline(inputs)
-    for output in outputs:
-        print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-#### 单卡pipeline推理
-
-```bash
-python predict_custom.py
-```
-
-### 基于generate的推理
-
-以下为基于model.generate接口的自定义推理脚本，支持多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-
-from mindformers import AutoConfig, AutoTokenizer, AutoModel
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["#language: Python\n# write a bubble sort function\n",
-              "#language: Python\n# write a quick sort function\n",
-              "#language: Python\n# write a heap sort function\n"]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained("codegeex2_6b")
-    model_config.batch_size = len(inputs)
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("codegeex2_6b")
-    # build model from config
-    model = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard codegeex2 and load sharded ckpt
-        model = Model(model)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(model_config.batch_size, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(model, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    inputs_ids = tokenizer(inputs, max_length=model_config.seq_length, padding="max_length")["input_ids"]
-    outputs = model.generate(inputs_ids, max_length=model_config.max_decode_length)
-    for output in outputs:
-        print(tokenizer.decode(output))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-#### 单卡generate推理
-
-```bash
-python predict_custom.py
-```
-
-### 脚本启动
-
-#### 单卡推理
-
-```bash
-python run_mindformer.py --config configs/codegeex2/run_codegeex2_6b.yaml --run_mode predict --predict_data  #language: Python\n# write a bubble sort function\n --use_parallel False
-# output result is: [{'text_generation_text': ['#language: Python\n# write a bubble sort function\n\ndef bubble_sort(list):\n for i in range(len(list) - 1):\n for j in range(len(list) - 1):\n if list[j] > list[j + 1]:\n list[j], list[j + 1] = list[j + 1], list[j]\n return list\n\n\n print(bubble_sort([5, 2, 1, 8, 4]))']}]
-```
-
-**注**：要提高推理速度，可在对应模型配置文件中进行如下配置，设置增量推理`use_past`为True。
-
-```yaml
-# model config
-use_past: True          # 开启增量推理
-use_moe: False
-checkpoint_name_or_path: "codegeex2_6b"
-max_decode_length: 1024
-top_k: 1
-top_p: 1
-do_sample: True
-```
diff --git a/docs/model_cards/glm.md b/docs/model_cards/glm.md
deleted file mode 100644
index 8f167e2d..00000000
--- a/docs/model_cards/glm.md
+++ /dev/null
@@ -1,869 +0,0 @@
-# ChatGLM
-
-## 模型描述
-
-ChatGLM-6B 是一个开源的、支持中英双语的对话语言模型，基于 [General Language Model (GLM)](https://github.com/THUDM/GLM) 架构，具有 62 亿参数。ChatGLM-6B 使用了和 ChatGPT 相似的技术，针对中文问答和对话进行了优化。经过约 1T 标识符的中英双语训练，辅以监督微调、反馈自助、人类反馈强化学习等技术的加持，62 亿参数的 ChatGLM-6B 已经能生成相当符合人类偏好的回答，更多信息请参考清华的[博客](https://chatglm.cn/blog)。在此仓中，提供ChatGLM6B的推理和微调能力。
-
-## 仓库介绍
-
-`chatGLM6B` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/glm`
-
-    ```bash
-    glm
-        ├── __init__.py
-        ├── attention.py            # 自注意力
-        ├── chatglm_6b_tokenizer.py # tokenizer
-        ├── glm_config.py           # 模型配置项
-        ├── glm.py                  # 模型实现
-        └── layers.py               # glm 层定义
-    ```
-
-2. 模型配置：`configs/glm`
-
-    ```bash
-    glm
-        ├── run_glm_6b_fintune.yaml     # 全量微调启动配置
-        ├── run_glm_6b_lora.yaml        # lora低参微调启动配置
-        ├── run_glm_6b_infer.yaml       # 推理启动配置
-        └── run_glm_6b_lora_infer.yaml  # lora模型推理启动配置
-    ```
-
-## 环境要求
-
-- 硬件：Atlas 800
-- MindSpore：2.0.0rc1 / 1.10.1
-- MindFormers版本：dev
-
-推理可在单机单卡上完成部署
-
-全量微调训练需要最少单机8卡，Lora微调训练最少需要1卡
-
-## ChatGLM6B推理
-
-> 需开发者提前pip安装。具体接口说明请参[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-
-### AutoClass推理
-
-可以使用AutoClass接口，通过模型名称获取相应的模型/tokenizer实例，并自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/glm`
-
-首次运行pipeline推理时需要进行模型编译，需等待一段时间
-
-```python
->>> import mindspore; mindspore.set_context(mode=0, device_id=0)
->>> from mindformers import AutoModel, AutoTokenizer, TextGenerationPipeline
->>> model = AutoModel.from_pretrained("glm_6b_chat")
->>> tokenizer = AutoTokenizer.from_pretrained("glm_6b")
->>> pipeline = TextGenerationPipeline(model, tokenizer, max_length=2048)
->>> pipeline("你好")
-[{'text_generation_text': ['你好 你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。']}]
-```
-
-> 注：`AutoModel.from_pretrained()` 接口当前支持 `glm_6b` 和 `glm_6b_chat` 两类模型，前者为通用模型，后者具备推理加速特性，仅用于推理，两者共享权重，在推理场景下建议使用后者，以获得更快的推理体验
-
-### pipeline推理
-
-也可以不实例化构造模型，直接通过指定任务模型与模型名的方式进行pipeline的构造
-
-pipeline中，也可以使用 `glm_6b_chat` 模型加速推理
-
-```python
->>> import mindspore; mindspore.set_context(mode=0, device_id=0)
->>> from mindformers import pipeline
->>> task_pipeline = pipeline(task='text_generation', model='glm_6b_chat', max_length=2048)
->>> task_pipeline('你好')
-[{'text_generation_text': ['你好 你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。']}]
-```
-
-### 基于API接口的推理
-
-可使用如下`chat_glm.py`脚本：
-
-```python
-import time
-import mindspore as ms
-import numpy as np
-from mindformers.models.glm import GLMConfig, GLMChatModel
-from mindformers.models.glm.chatglm_6b_tokenizer import ChatGLMTokenizer
-from mindformers.models.glm.glm_processor import process_response
-
-config = GLMConfig(
-    position_encoding_2d=True,
-    use_past=True,
-    is_sample_acceleration=True,
-)
-
-def chat_glm():
-    ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=7)
-    model = GLMChatModel(config)
-    ms.load_checkpoint("./checkpoint_download/glm/glm_6b.ckpt", model)
-    tokenizer = ChatGLMTokenizer('./checkpoint_download/glm/ice_text.model')
-
-    prompts = ["你好", "请介绍一下华为", "用python写一个快排"]
-    history = []
-    for query in prompts:
-        input_ids = tokenizer(query)['input_ids']
-
-        start_time = time.time()
-        outputs = model.generate(input_ids, max_length=config.max_decode_length, do_sample=False)
-        end_time = time.time()
-        print(f'generate speed: {outputs[0].shape[0]/(end_time-start_time):.2f} tokens/s')
-
-        response = tokenizer.decode(outputs)
-        response = process_response(response[0])
-        print(response)
-
-
-if __name__ == "__main__":
-    chat_glm()
-```
-
-## 微调
-
-下面以 [ADGEN](https://aclanthology.org/D19-1321.pdf) (广告生成) 数据集为例介绍代码的使用方法
-
-### 数据处理（在线加载与离线生成二选一，优先推荐在线加载方式）
-
-ADGEN 数据集任务为根据输入（content）生成一段广告词（summary）。数据集可选离线生成 `Mindrecord` 或者实时生成两种方式，两种方式选其一即可。
-
-```json
-{
-    "content": "类型#上衣*版型#宽松*版型#显瘦*图案#线条*衣样式#衬衫*衣袖型#泡泡袖*衣款式#抽绳",
-    "summary": "这件衬衫的款式非常的宽松，利落的线条可以很好的隐藏身材上的小缺点，穿在身上有着很好的显瘦效果。领口装饰了一个可爱的抽绳，漂亮的绳结展现出了十足的个性，配合时尚的泡泡袖型，尽显女性甜美可爱的气息。"
-}
-```
-
-从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing) 或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载处理好的 ADGEN 数据集，将解压后的 `AdvertiseGen` 任意目录下
-
-#### 1. 在线加载
-
-将任务配置文件 `configs/glm/run_glm_6b_*.yaml` 中的 `==== dataset config ====` 部分中的 `dataset_dir` 指向 `*.json` 文件，`vocab_file` 指向词表文件，**跳过** “2. 离线生成” 步骤。
-
-#### 2. 离线生成
-
-将任务配置文件 `configs/glm/run_glm_6b_*.yaml` 中的 `==== dataset config ====` 部分替换成：
-
-```yaml
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels", "position_ids", "attention_mask"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-  seed: 0
-
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-  seed: 0
-
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-```
-
-使用 `mindformers/tools/dataset_preprocess/glm/adgen_dataset.py` 脚本将数据集处理成mindrecord格式。
-
-执行命令生成训练数据集：
-
-```bash
-python adgen_dataset.py \
-    --input_file /path/to/AdvertiseGen/train.json \
-    --vocab_file /path/to/ice_text.model\
-    --output_file /path/to/AdvertiseGen/train_0604_128.mindrecord \
-    --max_source_length 64 \
-    --max_target_length 64 \
-    --mode train
-```
-
-执行命令生成评估数据集：
-
-```bash
-python adgen_dataset.py \
-    --input_file /path/to/AdvertiseGen/dev.json \
-    --vocab_file /path/to/ice_text.model \
-    --output_file /path/to/AdvertiseGen/eval_0604_256.mindrecord \
-    --max_source_length 256 \
-    --max_target_length 256 \
-    --mode eval
-```
-
-### 生成HCCL文件
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件；
-
-```shell
-# step1：机器上运行如下命令，生成各自的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-> 注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 全参微调
-
-#### run_mindformers脚本启动全参微调
-
-全参微调使用 `configs/glm/run_glm_6b_finetune.yaml` 配置文件，配置文件中定义了微调所需的各配置项
-
-修改数据集/模型权重配置路径：
-
-- 数据集：修改 `mindformers/configs/glm/run_glm_6b_finetune.yaml` 脚本中`train_dataset` 的 `dataset_dir` 为前文生成的数据集路径。
-- 加载预训练模型权重：修改 `mindformers/configs/glm/run_glm_6b_finetune.yaml` 脚本中的 `load_checkpoint` 为预训练模型权重路径。
-
-启动全参微调脚本：
-
-```shell
-cd scripts
-# Usage Help: bash run_distribute.sh [RANK_TABLE_FILE] [CONFIG_PATH] [DEVICE_RANGE] [RUN_STATUS]
-bash run_distribute.sh /path/to/hccl_8p_01234567_127.0.1.1.json ../configs/glm/run_glm_6b_finetune.yaml '[0,8]' finetune
-# 将此处rank_table_file替换为实际路径
-```
-
-参数说明
-
-```text
-RANK_TABLE_FILE: 由mindformers/tools/hccl_tools.py生成的分布式json文件
-CONFIG_PATH: 为configs文件夹下面的glm/run_glm_6b.yaml配置文件
-DEVICE_RANGE: 为单机分布式卡的范围，如 '[0,8]' 为8卡分布式，不包含8本身
-RUN_STATUS: 为任务运行状态，支持关键字 train\finetune\eval\predict
-```
-
-> 注：由于GLM6B的模型较大，无法在单卡上运行，此处仅提供分布式启动脚本
-
-训练的log日志路径：mindformers/output/log
-
-checkpoint存储路径：mindformers/output/checkpoint
-
-#### Trainer高阶接口启动全参微调
-
-下面提供一个使用高阶接口进行GLM模型开发的样例脚本 `task.py`，用户可参照以下步骤熟悉如何使用高阶接口进行GLM模型的训练开发
-
-```python
-import argparse
-
-from mindformers import Trainer, TrainingArguments
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-
-def context_init(use_parallel=False, optimizer_parallel=False):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=0)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                enable_parallel_optimizer=optimizer_parallel,
-                                                full_batch=True)
-    rank_id, device_num = init_context(use_parallel=use_parallel,
-                                       context_config=context_config,
-                                       parallel_config=parallel_config)
-
-def main(use_parallel=False,
-         run_mode='train',
-         task='text_generation',
-         model_type='glm_6b',
-         checkpoint_path='./glm_6b.ckpt',
-         train_dataset='./train',
-         eval_dataset='./eval',
-         predict_data='你好',
-         batch_size=4,
-         dp=1, mp=1, pp=1, micro_size=1, op=False):
-    if use_parallel.lower() == "true":
-        use_parallel = True
-    else:
-        use_parallel = False
-    # 环境初始化
-    context_init(use_parallel, op)
-    # 训练超参数定义
-    training_args = TrainingArguments(num_train_epochs=1, batch_size=batch_size, learning_rate=5e-5, warmup_steps=100, sink_mode=True, sink_size=4)
-    # 定义任务，预先准备好相应数据集
-    task = Trainer(task=task, model=model_type, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset)
-    task.set_parallel_config(data_parallel=dp,
-                             model_parallel=mp,
-                             pipeline_stage=pp,
-                             micro_batch_num=micro_size)
-    if run_mode == 'train':
-        # 训练
-        task.train()
-    elif run_mode == 'finetune':
-        # 微调
-        task.finetune(checkpoint_path)
-    elif run_mode == 'eval':
-        # 评估
-        task.evaluate(checkpoint_path)
-    elif run_mode == 'predict':
-        # 推理，仅支持单卡推理
-        assert use_parallel == False, "only support predict under stand_alone mode."
-        result = task.predict(input_data=predict_data)
-        print(result)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--run_mode', default='train', required=True, help='set run mode for model.')
-    parser.add_argument('--use_parallel', default=False, help='open parallel for model.')
-    parser.add_argument('--task', default='text_generation', required=True, help='set task type.')
-    parser.add_argument('--model_type', default='glm_6b', required=True, help='set model type.')
-    parser.add_argument('--checkpoint_path', default=None, help='set checkpoint path.')
-    parser.add_argument('--train_dataset', default=None, help='set train dataset.')
-    parser.add_argument('--eval_dataset', default=None, help='set eval dataset.')
-    parser.add_argument('--batch_size', default=4, help='batch size of dataset.')
-    parser.add_argument('--data_parallel', default=1, type=int,help='set data parallel number. Default: None')
-    parser.add_argument('--model_parallel', default=1, type=int, help='set model parallel number. Default: None')
-    parser.add_argument('--pipeline_parallel', default=1, type=int, help='set pipeline parallel number. Default: None')
-    parser.add_argument('--micro_size', default=1, type=int, help='set micro batch number. Default: None')
-    parser.add_argument('--optimizer_parallel', default=False, type=bool, help='whether use optimizer parallel. Default: None')
-    args = parser.parse_args()
-    print(args)
-    main(run_mode=args.run_mode,
-         task=args.task,
-         use_parallel=args.use_parallel,
-         model_type=args.model_type,
-         checkpoint_path=args.checkpoint_path,
-         train_dataset=args.train_dataset,
-         eval_dataset=args.eval_dataset,
-         batch_size=int(args.batch_size),
-         dp=args.data_parallel,
-         mp=args.model_parallel,
-         pp=args.pipeline_parallel,
-         micro_size=args.micro_size,
-         op=args.optimizer_parallel)
-```
-
-因GLM模型过大，**无法在单卡上启动训练**，因此需要**通过分布式脚本拉起多卡训练任务**
-
-在此提供 `run_distribute_single_node.sh` 单机多卡标准启动脚本，用户可用其拉起分布式训练
-
-```bash
-#!/bin/bash
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-if [ $# != 4 ]
-then
-  echo "Usage Help: bash run_distribute_single_node.sh [EXECUTE_ORDER] [RANK_TABLE_PATH]  [DEVICE_RANGE] [RANK_SIZE] For Multiple Devices In Single Machine"
-  exit 1
-fi
-
-check_real_path(){
-  if [ "${1:0:1}" == "/" ]; then
-    echo "$1"
-  else
-    echo "$(realpath -m $PWD/$1)"
-  fi
-}
-
-EXECUTE_ORDER=$1
-RANK_TABLE_PATH=$(check_real_path $2)
-DEVICE_RANGE=$3
-
-DEVICE_RANGE_LEN=${#DEVICE_RANGE}
-DEVICE_RANGE=${DEVICE_RANGE:1:DEVICE_RANGE_LEN-2}
-PREFIX=${DEVICE_RANGE%%","*}
-INDEX=${#PREFIX}
-START_DEVICE=${DEVICE_RANGE:0:INDEX}
-END_DEVICE=${DEVICE_RANGE:INDEX+1:DEVICE_RANGE_LEN-INDEX}
-
-if [ ! -f $RANK_TABLE_PATH ]
-then
-    echo "error: RANK_TABLE_FILE=$RANK_TABLE_PATH is not a file"
-exit 1
-fi
-
-
-if [[ ! $START_DEVICE =~ ^[0-9]+$ ]]; then
-    echo "error: start_device=$START_DEVICE is not a number"
-exit 1
-fi
-
-if [[ ! $END_DEVICE =~ ^[0-9]+$ ]]; then
-    echo "error: end_device=$END_DEVICE is not a number"
-exit 1
-fi
-
-ulimit -u unlimited
-
-export RANK_SIZE=$4
-export RANK_TABLE_FILE=$RANK_TABLE_PATH
-
-shopt -s extglob
-
-for((i=${START_DEVICE}; i<${END_DEVICE}; i++))
-do
-    export DEVICE_ID=${i}
-    export RANK_ID=$((i-START_DEVICE))
-    mkdir -p ./output/log/rank_$RANK_ID
-    echo "start training for rank $RANK_ID, device $DEVICE_ID"
-    $EXECUTE_ORDER &> ./output/log/rank_$RANK_ID/mindformer.log &
-done
-
-shopt -u extglob
-```
-
-全参微调分布式拉起命令(8卡)：
-
-```bash
-bash run_distribute_single_node.sh "python task.py --task text_generation --model_type glm_6b --checkpoint_path ./glm_6b.ckpt --train_dataset ./train --run_mode finetune --use_parallel True --data_parallel 1 --model_parallel 8" /path/to/hccl_8p_xxx.json '[0,8]' 8
-```
-
-参数含义:
-
-- `"python task.py --task text_generation --model_type glm_6b --checkpoint_path ./glm_6b.ckpt --train_dataset ./train --run_mode finetune --use_parallel True --data_parallel 1 --model_parallel 8"`: 需执行的命令，此处完整输入task.py的启动命令
-
-python task.py 各项参数含义：
-
-- `task`: 需运行的训练任务，此处为 `text_generation` 文本生成任务
-- `model_type`: 模型类型，此处选择 `glm_6b` 模型
-- `checkpoint_path`: 权重路径，此处替换为实际需加载的权重路径
-- `train_dataset`: 训练数据集路径，替换为实际路径
-- `run_mode`: 启动模式，train——训练，finetune——微调，eval——评估，predict——推理，此处选择 `finetune`
-- `use_parallel`: 是否使用多卡并行训练，此处为 `True`
-- `data_parallel`: 数据并行数，此处为1表示不开启
-- `model_parallel`: 模型并行数，此处为8表示8卡并行
-
-bash 脚本其余参数：
-
-- `/path/to/hccl_4p_xxx.json`: rank table file路径，替换为之前准备的rank table file的实际路径
-- `'[0,8]'`: 占用的卡范围，0包含，8不包含，表示使用 `0~7` 8张卡并行训练
-- `8`: rank size，一共使用了多少张卡，此处为8
-
-训练的log日志路径：mindformers/output/log
-
-checkpoint存储路径：mindformers/output/checkpoint
-
-### LoRA低参微调
-
-全参微调能够在微调数据集上取得良好效果，但存在遗忘预训练知识的现象
-因此推荐使用低参微调算法，冻结原模型权重，仅在小规模参数量上进行训练，在微调数据集上取得良好效果的同时，缓解模型遗忘现象
-
-#### run_mindformers脚本启动LoRA低参微调
-
-使用LoRA算法进行低参微调时，使用 `configs/glm/run_glm_6b_lora.yaml` 配置文件，该配置文件包含了lora低参微调算法所需的配置项
-
-修改数据集/模型权重配置路径：
-
-- 数据集：修改 `mindformers/configs/glm/run_glm_6b_lora.yaml` 脚本中`train_dataset` 的 `dataset_dir` 为前文生成的数据集路径。
-- 加载预训练模型权重：修改 `mindformers/configs/glm/run_glm_6b_lora.yaml` 脚本中的 `load_checkpoint` 为预训练模型权重路径。
-
-#### 启动LoRA低参微调脚本(1卡)：
-
-执行命令：
-
-```shell
-cd scripts
-# Usage Help: bash run_standalone.sh [CONFIG_PATH] [DEVICE_ID] [RUN_STATUS]
-bash run_standalone.sh ../configs/glm/run_glm_6b_lora.yaml 0 finetune
-```
-
-训练的log日志路径：mindformers/scripts/mf_standalone/
-
-checkpoint存储路径：mindformers/scripts/mf_standalone/output/checkpoint
-
-#### 启动LoRA低参微调脚本(4卡)：
-
-> 注：如果需要进行多卡训练，则需要对`glm/run_glm_6b_lora.yaml`配置文件对应参数进行修改，以4卡为例，需要重新生成4卡的HCCL文件：
-
-```shell
-data_parallel: 4
-```
-
-```shell
-cd scripts
-# Usage Help: bash run_distribute.sh [RANK_TABLE_FILE] [CONFIG_PATH] [DEVICE_RANGE] [RUN_STATUS]
-bash run_distribute.sh /path/to/hccl_4_0123_xxx.json ../configs/glm/run_glm_6b_lora.yaml '[0,4]' finetune
-# 将此处rank_table_file替换为实际路径
-```
-
-参数说明：
-
-对比全参微调启动方式，仅将 `CONFIG_PATH` 项修改为configs文件夹下面的 `glm/run_glm_6b_lora.yaml` 配置文件，表示使用该接口进行
-
-训练的log日志路径：mindformers/output/log
-
-checkpoint存储路径：mindformers/output/checkpoint
-
-#### Trainer高阶接口启动LoRA低参微调
-
-可复用全参微调部分所提供的 `task.py` 和 `run_distribute_single_node.sh` 脚本
-
-4卡分布式启动命令：
-
-```bash
-bash run_distribute_single_node.sh "python task.py --task text_generation --model_type glm_6b_lora --checkpoint_path ./glm_6b.ckpt --train_dataset ./train --run_mode finetune --use_parallel True --data_parallel 4 --model_parallel 1" /path/to/hccl_4p_xxx.json '[0,4]' 4
-```
-
-参数说明：对比全参微调启动，仅改动以下几点：
-
-- `model_type`: 指定模型类型为 `glm_6b_lora`，表示使用低参微调算法
-- `data_parallel`: 4卡启动，数据并行改为4
-- `/path/to/hccl_4p_xxx.json`: 使用4卡的rank_table_file
-- `'[0,4]' 4`: 使用0~3共4卡
-
-训练的log日志路径：mindformers/output/log
-
-checkpoint存储路径：mindformers/output/checkpoint
-
-1卡启动命令：
-
-```shell
-python task.py --task text_generation --model_type glm_6b_lora --checkpoint_path ./glm_6b.ckpt --train_dataset ./train --run_mode finetune --use_parallel False --data_parallel 1 --model_parallel 1
-```
-
-### 多机多卡微调训练
-
-多机多卡启动
-首先在每台机器上运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件；
-
-将不同机器上生成的RANK_TABLE_FILE文件中的server_list合并，server_count设为机器数，rank_id顺序增加，并保证不同机器上的RANK_TABLE_FILE相同；
-
-在多机上同时拉起任务，拉起方式为
-
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE CONFIG_PATH DEVICE_RANGE RUN_MODE RANK_SIZE
-
-#### 参数说明
-
-- RANK_TABLE_FILE: 由mindformers/tools/hccl_tools.py生成的分布式json文件
-- CONFIG_PATH: 为configs文件夹下面的gpt2/run_gpt2*.yaml配置文件
-- DEVICE_RANGE: 为单机分布式卡的范围, 如[0,8]为8卡分布式，不包含8本身
-- RUN_MODE: 为任务运行状态，支持关键字 train 预训练、predict（文本生成预测）
-- RANK_SIZE: 总运行卡数
-
-#### 4机32卡参考RANK_TABLE_FILE样例
-
-```text
-{
-  "version": "1.0",
-  "server_count": "4",
-  "server_list": [
-    {
-      "server_id": "10.155.111.140",
-      "device": [
-        {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-        {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-        {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-        {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-        {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-        {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-        {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-        {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-      "host_nic_ip": "reserve"
-    },
-    {
-      "server_id": "10.155.111.141",
-      "device": [
-        {"device_id": "0","device_ip": "192.1.27.8","rank_id": "8"},
-        {"device_id": "1","device_ip": "192.2.27.8","rank_id": "9"},
-        {"device_id": "2","device_ip": "192.3.27.8","rank_id": "10"},
-        {"device_id": "3","device_ip": "192.4.27.8","rank_id": "11"},
-        {"device_id": "4","device_ip": "192.1.27.9","rank_id": "12"},
-        {"device_id": "5","device_ip": "192.2.27.9","rank_id": "13"},
-        {"device_id": "6","device_ip": "192.3.27.9","rank_id": "14"},
-        {"device_id": "7","device_ip": "192.4.27.9","rank_id": "15"}],
-      "host_nic_ip": "reserve"
-    },
-    {
-      "server_id": "10.155.111.142",
-      "device": [
-        {"device_id": "0","device_ip": "192.1.27.10","rank_id": "16"},
-        {"device_id": "1","device_ip": "192.2.27.10","rank_id": "17"},
-        {"device_id": "2","device_ip": "192.3.27.10","rank_id": "18"},
-        {"device_id": "3","device_ip": "192.4.27.10","rank_id": "19"},
-        {"device_id": "4","device_ip": "192.1.27.11","rank_id": "20"},
-        {"device_id": "5","device_ip": "192.2.27.11","rank_id": "21"},
-        {"device_id": "6","device_ip": "192.3.27.11","rank_id": "22"},
-        {"device_id": "7","device_ip": "192.4.27.11","rank_id": "23"}],
-      "host_nic_ip": "reserve"
-    },
-    {
-      "server_id": "10.155.111.143",
-      "device": [
-        {"device_id": "0","device_ip": "192.1.27.12","rank_id": "24"},
-        {"device_id": "1","device_ip": "192.2.27.12","rank_id": "25"},
-        {"device_id": "2","device_ip": "192.3.27.12","rank_id": "26"},
-        {"device_id": "3","device_ip": "192.4.27.12","rank_id": "27"},
-        {"device_id": "4","device_ip": "192.1.27.13","rank_id": "28"},
-        {"device_id": "5","device_ip": "192.2.27.13","rank_id": "29"},
-        {"device_id": "6","device_ip": "192.3.27.13","rank_id": "30"},
-        {"device_id": "7","device_ip": "192.4.27.13","rank_id": "31"}],
-      "host_nic_ip": "reserve"
-    }
-  ],
-  "status": "completed"
-}
-```
-
-#### 任务拉起命令示例
-
-```shell
-# 第一台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/glm/run_glm_6b_lora.yaml [0,8] train 32
-# 第二台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/glm/run_glm_6b_lora.yaml [8,16] train 32
-# 第三台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/glm/run_glm_6b_lora.yaml [16,24] train 32
-# 第四台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/glm/run_glm_6b_lora.yaml [24,32] train 32
-```
-
-### 微调后推理
-
-#### 推理样例脚本
-
-下面提供一个模型推理样例脚本 `infer.py`
-
-```python
-import time
-import mindspore as ms
-import numpy as np
-import argparse
-from mindformers.models.glm import GLMConfig, GLMChatModel
-from mindformers.models.glm.chatglm_6b_tokenizer import ChatGLMTokenizer
-from mindformers.models.glm.glm_processor import process_response
-from mindformers.pet.pet_config import LoraConfig
-from mindformers.pet import get_pet_model
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--seq_length', default=1024, type=int, help='Which device to run service.')
-parser.add_argument('--device_id', default=0, type=int, help='Which device to run service.')
-parser.add_argument('--checkpoint_path', type=str, default='/path/chatglm6b.ckpt', help='Checkpoint file to load on.')
-parser.add_argument('--vocab_path', type=str, default='/path/ice_text.model', help='Vocab file to load on.')
-parser.add_argument('--is_lora', type=str, default='false',help='Whether is lora model.')
-
-args = parser.parse_args()
-
-if args.is_lora.lower() == "true":
-    is_lora = True
-else:
-    is_lora = False
-
-config = GLMConfig(
-    position_encoding_2d=True,
-    use_past=True,
-    is_sample_acceleration=True,
-)
-
-pet_config = LoraConfig(
-    lora_rank=8,
-    lora_alpha=32,
-    lora_dropout=0.1,
-    target_modules = '.*query_key_value*'
-)
-
-
-def chat_glm():
-    ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=args.device_id)
-    model = GLMChatModel(config)
-    if is_lora:
-       config.pet_config = pet_config
-       model = get_pet_model(model, pet_config)
-    ms.load_checkpoint(args.checkpoint_path, model)
-    tokenizer = ChatGLMTokenizer(args.vocab_path)
-
-    inputs = ["你好",
-              "请介绍一下华为",
-              "用Python写一个快排",
-              "类型#上衣*材质#牛仔布*颜色#白色*风格#简约*图案#刺绣*衣样式#外套*衣款式#破洞"]
-
-    for query in inputs:
-        input_ids = tokenizer(query)['input_ids']
-
-        start_time = time.time()
-        outputs = model.generate(input_ids, max_length=config.max_decode_length, do_sample=False)
-        end_time = time.time()
-        print(f'generate speed: {outputs[0].shape[0]/(end_time-start_time):.2f} tokens/s')
-
-        response = tokenizer.decode(outputs)
-        response = process_response(response[0])
-        print(response)
-
-
-if __name__ == "__main__":
-    chat_glm()
-```
-
-#### 运行命令
-
-```shell
-python infer.py --seq_length 1024 --device_id 0  --checkpoint_path /path/chatglm6b.ckpt --vocab_path /path/ice_text.model --is_lora True
-```
-
-参数说明：
-
-- `seq_length`: 用于指定推理输入长度
-- `device_id`: 指定推理在那张设备运行
-- `checkpoint_path`: 指定训练出来的模型文件路径用于推理
-- `vocab_path`: 模型词表
-- `is_lora`: 用于区分是否是lora模型，设置为true表示为lora微调训练模型
-
-## 评估
-
-### 模型权重文件合一
-
-微调所得到的权重文件为根据模型切分策略切分后的权重，我们需要手动将切分权重合一，以用于评估和推理
-
-1. 获取模型切分策略文件：
-   在执行全参微调脚本时，模型完成编译后，将会在运行路径下，生成名为 `ckpt_strategy.ckpt` 的切分策略文件，该文件将用于第二步模型合成
-
-2. MindSpore提供了根据切分策略转换模型权重切分的接口，[mindspore.transform_checkpoints](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/mindspore/mindspore.transform_checkpoints.html)，执行以下python脚本，将8份模型文件合成一份
-
-    ```python
-    from mindspore import transform_checkpoints
-    transform_checkpoints(
-        src_checkpoints_dir="./output/checkpoint/", # 原切分权重文件夹
-        dst_checkpoints_dir="./target_checkpoint/", # 目标路径
-        ckpt_prefix="glm-6b", # .ckpt文件前缀名
-        src_strategy_file="ckpt_stragery.ckpt", # 步骤1中的切分策略文件路径
-        dst_strategy_file=None # None表示不切分，权重合一
-    )
-    ```
-
-> 注：`transform_checkpoints` 接口当前仅mindspore 2.0以上版本支持，如当前硬件环境只支持2.0以下版本，可以新建conda环境安装mindspore 2.0的cpu版本以执行该脚本
-
-### 使用全参微调权重
-
-#### run_mindformers启动eval
-
-使用全参微调权重时，启动如下shell脚本，执行单卡评估
-
-配置文件选择 `configs/glm/run_glm_6b_infer.yaml` glm模型推理配置，此配置下评估速度更快
-
-```bash
-python run_mindformer.py --config configs/glm/run_glm_6b_infer.yaml --run_mode eval --load_checkpoint /path/to/glm_6b.ckpt --eval_dataset_dir /path/to/data/AdvertiseGen/ --device_id 0
-```
-
-> 注：使用离线生成数据方式时，将 `eval_dataset_dir` 一项指向`.mindrecord`文件，如 `/path/to/data/AdvertiseGen/adgen_dev.mindrecord`。
-
-各项参数：
-
-- `config`: 指定用于评估的配置文件名称，此处为`configs/glm/run_glm_6b_infer.yaml`
-- `run_mode`: 指定执行模式，此为`eval`，表示为评估模式
-- `load_checkpoint`: 指定要加载的checkpoint路径，此处为`/path/to/glm_6b.ckpt`，替换为需加载的权重的真实路径
-- `eval_dataset_dir`: 评估数据集的路径
-- `device_id`: 指定要使用的设备编号（从0开始）
-
-评估完成后会打印评估指标 `bleu-4`、`rouge-1`、`rouge-2`、`rouge-l`
-
-> 注：由于默认评估指标的获取方式为生成完整文本后与预期文本做比较，评估速度将受限于模型大小与文本生成速度，评估流程可能较为缓慢
-
-#### Trainer高阶接口启动eval
-
-仍然可复用 `task.py` 脚本，启动命令：
-
-```bash
-python task.py --task text_generation --model_type glm_6b_chat --checkpoint_path /path/to/glm_6b.ckpt --eval_dataset /path/to/data/AdvertiseGen/ --run_mode eval --batch_size 1
-```
-
-> 1. 当前评估时，batch_size需为1，否则评估速度下降严重
-> 2. 使用离线生成数据方式时，将 `eval_dataset` 一项指向`.mindrecord`文件，如 `/path/to/data/AdvertiseGen/adgen_dev.mindrecord`。
-
-### 使用LoRA低参微调权重
-
-#### run_mindformers启动lora eval
-
-使用LoRA低参微调权重时，启动如下shell脚本，执行单卡评估
-
-配置文件选择 `configs/glm/run_glm_6b_lora_infer.yaml` glm_lora模型推理配置，此配置可用于lora模型，并且评估速度更快
-
-```bash
-python run_mindformer.py --config configs/glm/run_glm_6b_lora_infer.yaml --run_mode eval --load_checkpoint /path/to/glm_6b_lora.ckpt --eval_dataset_dir /path/to/data/AdvertiseGen/ --device_id 0
-```
-
-各项参数同上，路径需替换为实际路径
-
-> 使用离线生成数据方式时，将 `eval_dataset_dir` 一项指向`.mindrecord`文件，如 `/path/to/data/AdvertiseGen/adgen_dev.mindrecord`。
-
-#### Trainer高阶接口启动lora eval
-
-仍然可复用 `task.py` 脚本，启动命令：
-
-```bash
-python task.py --task text_generation --model_type glm_6b_lora_chat --checkpoint_path /path/to/glm_6b_lora.ckpt --eval_dataset /path/to/data/AdvertiseGen/ --run_mode eval --batch_size 1
-```
-
-> 1. 当前评估时，batch_size需为1，否则评估速度下降严重
-> 2. 使用离线生成数据方式时，将 `eval_dataset_dir` 一项指向`.mindrecord`文件，如 `/path/to/data/AdvertiseGen/adgen_dev.mindrecord`。
-
-## 模型权重转化
-
-本仓库中的`glm`来自于HuggingFace的[chatglm-6b](https://huggingface.co/THUDM/chatglm-6b)，基于下述的步骤获取：
-
-1. 克隆chatglm-6b代码仓，下载分布式的模型文件。
-
-   ```shell
-   git lfs install
-   git clone https://huggingface.co/THUDM/chatglm-6b
-   ```
-
-2. 执行转换脚本，得到转换后的输出文件`ms_glm_6b.ckpt`。
-
-   ```shell
-   python mindformers/models/glm/convert_weight.py --pt_ckpt_path "replace your ptroch pth path" --ms_ckpt_path ./ms_glm_6b.ckpt
-   ```
-
-   ```shell
-   # 参数说明
-   pt_ckpt_path: huggingface权重保存目录下的任意权重bin文件,根据该文件路径读取目录下全部权重
-   ms_ckpt_path: 权重保存文件名，可以指定自定义保存路径
-   ```
-
diff --git a/docs/model_cards/llama.md b/docs/model_cards/llama.md
deleted file mode 100644
index 1779aede..00000000
--- a/docs/model_cards/llama.md
+++ /dev/null
@@ -1,1150 +0,0 @@
-# LLaMA
-
-## 模型描述
-
-LLaMA是由Meta于2023年发布。LLaMA模型是类GPT模型，是一个生成式的语言模型，主要是用于预测下一个单词。LLaMA目前按照参数量，目前有四个版本：LLaMA-7B（7B）、LLaMA-13B（13B）、LLaMA-33B（33B）以及LLaMA-65B（65B），目前在本仓库中，支持了7B，13B和65B三个规格的模型，权重文件来源于OpenLLaMA。
-
-[LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971)
-
-``` text
-@article{touvron2023llama,
-  title={LLaMA: Open and Efficient Foundation Language Models},
-  author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and Rodriguez, Aurelien and Joulin, Armand and Grave, Edouard and Lample, Guillaume},
-  journal={arXiv preprint arXiv:2302.13971},
-  year={2023}
-}
-```
-
-## 模型性能
-
-- 基于Atlas 800
-
-**llama_7b**:
-
-| config                                                      | task            | Datasets  | metric | phase                 | performance                   |
-| ----------------------------------------------------------- | --------------- | --------- | ------ | --------------------- | ----------------------------- |
-| [llama_7b](../../configs/llama/run_llama_7b.yaml)           | text_generation | WikiText2 | -      | [pretrain](#预训练)   | 1229 tokens/s/p               |
-| [llama_7b](../../configs/llama/run_llama_7b.yaml)           | text_generation | alpaca    | -      | [finetune](#全参微调) | 1229 tokens/s/p               |
-| [llama_7b_lora](../../configs/llama/run_llama_7b_lora.yaml) | text_generation | alpaca    | -      | [finetune](#lora微调) | 1843 tokens/s/p               |
-| [llama_7b](../../configs/llama/run_llama_7b.yaml)           | text_generation | WikiText2 | PPL    | [eval](#评测)         | 8.28                          |
-| [llama_7b](../../configs/llama/run_llama_7b.yaml)           | text_generation | SQuAD 1.1 | Em/F1  | [eval](#评测)         | 26.85/48.51                   |
-| [llama_7b](../../configs/llama/run_llama_7b.yaml)           | text_generation | -         | -      | [predict](#推理)      | 22.4 tokens/s (use_past=True) |
-
-llama_13b / llama_65b 待补充
-
-## 仓库介绍
-
-`LLaMA` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/llama`
-
-    ```bash
-    llama
-        ├── __init__.py
-        ├── convert_weight.py         # 权重转换脚本
-        ├── llama.py                  # 模型实现
-        ├── llama_config.py           # 模型配置项
-        ├── llama_layer.py            # llama网络层定义
-        ├── llama_processor.py        # llama预处理
-        ├── llama_tokenizer.py        # tokenizer
-        └── llama_transformer.py      # transformer层实现
-    ```
-
-2. 模型配置：`configs/llama`
-
-    ```bash
-    llama
-        ├── run_llama_7b.yaml         # 7b模型全量微调启动配置
-        ├── run_llama_7b_910b.yaml    # 7b模型全量微调启动配置(Atlas 800T A2)
-        ├── run_llama_7b_lora.yaml    # 7b lora低参微调启动配置
-        ├── run_llama_13b.yaml        # 13b全量微调启动配置
-        ├── run_llama_13b_910b.yaml   # 13b全量微调启动配置(Atlas 800T A2)
-        ├── run_llama_65b.yaml        # 65b全量微调启动配置
-        └── run_llama_65b_910b.yaml   # 65b全量微调启动配置(Atlas 800T A2)
-    ```
-
-3. 数据预处理脚本：
-
-    ```bash
-    mindformers/tools/dataset_preprocess/llama/
-        ├── alpaca_converter.py     # 基于fschat的alpaca数据集格式转换脚本
-        ├── llama_preprocess.py     # llama模型的mindrecord数据处理脚本
-        └── squad_data_process.py   # squad数据集格式转换脚本
-    ```
-
-## 前期准备
-
-### 环境要求
-
-- 硬件：Atlas 800/Atlas 800T A2
-- MindSpore：2.2.0
-- MindFormers版本：r1.0
-
-> 注：推理可在单机单卡上完成部署；全量微调至少需要单机8卡，Lora微调至少需要单卡。
-
-### [mindformers安装](../../README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环节)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-开发者可以下载获取官方权重后，通过下面提供的**权重转换脚本**，将官方权重转换为MindSpore权重；或直接使用MindFormers提供的**已转换权重**
-
-1. 使用官方权重进行转换
-    从huggingface下载英文预训练权重（权重来源于OpenLLaMA）：
-
-    - [llama-7b](https://huggingface.co/openlm-research/open_llama_7b)
-
-    - [llama-13b](https://huggingface.co/openlm-research/open_llama_13b)
-
-    > 注：65B权重OpenLLaMA未提供，如有需要，请开发者自行解决。
-
-    下载完成后，运行如下转换脚本，将huggingface的权重转换为完整的ckpt权重。
-
-    ```shell
-    python mindformers/models/llama/convert_weight.py \
-    --torch_ckpt_path TORCH_CKPT_PATH \
-    --mindspore_ckpt_path {path}/MS_CKPT_NAME
-    ```
-
-    ```text
-    # 参数说明
-    torch_ckpt_path: huggingface权重保存目录下的任意权重bin文件,根据该文件路径读取目录下全部权重
-    mindspore_ckpt_path: 权重保存文件名，可以指定自定义保存路径
-    ```
-
-2. 获取MindFormers提供的已转换权重
-   可通过from_pretrained接口下载，也可直接从下面的链接获取
-   - [llama_7b权重](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/llama/open_llama_7b.ckpt)
-   - [llama_13b权重](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/llama/open_llama_13b.ckpt)
-   - [tokenizer文件](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/llama/tokenizer.model)
-
-### [分布式训练/微调权重合并](../feature_cards/Transform_Ckpt.md)
-
-分布式训练/微调后所得到的权重文件为根据策略切分后的权重，需要手动将切分权重合一，以用于评估和推理。
-
-涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档模型[权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-- step 1. 获取模型切分策略文件：
-
-在执行微调脚本时，模型完成编译后，将会在`output/strategy`路径下生成各卡的切分策略文件，用于权重合并。
-
-> 注：lora微调时需要确认配置文件`parallel context config`中`only_trainable_params`设为`False`，以获取所有参数完整策略。
-
-- step 2. 运行`mindformers/tools/transform_ckpt.py`脚本进行多卡权重合并：
-
-```shell
-python transform_ckpt.py \
---src_ckpt_strategy {path}/output/strategy/ \
---src_ckpt_dir {path}/output/checkpoint/ \
---dst_ckpt_dir {path}/target_checkpoint/ \
---prefix llama_7b
-```
-
-```text
-# 参数说明
-src_ckpt_strategy: 步骤1中的切分策略文件路径
-src_ckpt_dir: 原切分权重文件夹
-dst_ckpt_dir: 目标路径
-prefix: ckpt文件前缀名
-```
-
-> 注：`transform_checkpoints` 接口当前仅mindspore 2.0以上版本支持，如当前硬件环境只支持2.0以下版本，可以新建conda环境安装mindspore 2.0的cpu版本以执行该脚本
-
-## 基于API的快速使用
-
-### 基于AutoClass的快速使用
-
-可以使用AutoClass接口，通过模型名称获取相应的model/preprocess/tokenizer等实例，并自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/llama`
-
-```python
-import mindspore
-from mindformers import AutoConfig, AutoModel, AutoTokenizer
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-tokenizer = AutoTokenizer.from_pretrained('llama_7b')
-
-# model的实例化有以下两种方式，选择其中一种进行实例化即可
-# 1. 直接根据默认配置实例化
-model = AutoModel.from_pretrained('llama_7b')
-# 2. 自定义修改配置后实例化
-config = AutoConfig.from_pretrained('llama_7b')
-config.use_past = True                  # 此处修改默认配置，开启增量推理能够加速推理性能
-# config.xxx = xxx                      # 根据需求自定义修改其余模型配置
-model = AutoModel.from_config(config)   # 从自定义配置项中实例化模型
-
-inputs = tokenizer("I love Beijing, because")["input_ids"]
-# 首次调用model.generate()进行推理将包含图编译时间，推理性能显示不准确，多次重复调用以获取准确的推理性能
-outputs = model.generate(inputs, max_new_tokens=20, do_sample=True, top_k=3)
-response = tokenizer.decode(outputs)
-print(response)
-# ['<s>I love Beijing, because it’s a city that has everything: the old and the new, the modern and the ancient']
-```
-
-### 基于Trainer的快速训练，微调，评测，推理
-
-> 注：下面仅显示接口使用方式，模型启动训练需求多卡分布式训练，训练脚本需配合分布式脚本启动
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 初始化预训练任务
-trainer = Trainer(task='text_generation',
-                  model='llama_7b',
-                  train_dataset='path/to/train_dataset',
-                  eval_dataset='path/to/eval_dataset')
-
-# 开启预训练
-# 请参照多卡训练，llama不支持单卡启动训练
-# trainer.train()
-
-# 开启全量微调
-# 请参照多卡微调，llama不支持单卡启动全量微调
-# trainer.finetune()
-
-# 开启评测
-trainer.evaluate()
-
-# 开启推理
-predict_result = trainer.predict(input_data="I love Beijing, because")
-# [{'text_generation_text': ['<s>I love Beijing, because it’s a city that has everything: the old and the new, the modern and the ancient']}]
-```
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-pipeline_task = pipeline("text_generation", model='llama_7b', max_length=20)
-pipeline_result = pipeline_task("I love Beijing, because", do_sample=True, top_k=3)
-print(pipeline_result)
-# [{'text_generation_text': ['<s>I love Beijing, because it’s a city that has everything: the old and the new, the modern and the ancient']}]
-```
-
-## 预训练
-
-### 数据集准备-预训练
-
-以Wikitext2数据集为例:
-
-- 数据集下载：[WikiText2数据集](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/dataset/wikitext-2/wikitext-2-v1.zip)
-
-- 分词模型下载：例如下载huggingface的[tokenizer.model](https://huggingface.co/openlm-research/open_llama_7b/blob/main/tokenizer.model)
-
-- 使用以下预处理脚本生成mindrecord训练数据
-
-```bash
-# 使用tools/dataset_preprocess/llama/llama_preprocess.py进行数据预处理+Mindrecord数据生成
-python llama_preprocess.py \
---dataset_type wiki \
---input_glob  /{path}/wiki.train.tokens \
---model_file /{path}/tokenizer.model \
---seq_length 2048 \
---output_file /{path}/wiki2048.mindrecord
-```
-
-### 脚本启动（LLaMA-7B为例）
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-#### 多卡训练
-
-##### 单机多卡
-
-- step 1. 修改模型对应的配置文件。
-
-在模型对应的配置文件`configs/llama/run_llama_{7/13/65}b.yaml`中，用户可自行修改模型、训练相关参数，并通过`train_dataset`的`dataset_dir`参数，指定训练数据集的路径。
-
-配置文件中各参数含义详见[Config配置说明文档](https://gitee.com/mindspore/mindformers/blob/master/configs/README.md)。
-
-- step2. 设置环境变量，变量配置如下：
-
-```bash
-export MS_ASCEND_CHECK_OVERFLOW_MODE="INFNAN_MODE"  # 推荐开启INFNAN模式
-```
-
-- step3：进入`scripts`文件夹，启动运行脚本，进行8卡分布式运行。
-
-```shell
-cd scripts
-bash run_distribute.sh hccl_xxxx.json ../configs/llama/run_llama_7b.yaml [0,8] train
-```
-
-```text
-# 脚本启动格式：
-bash run_distribute.sh [RANK_TABLE_FILE] [CONFIG_PATH] [DEVICE_RANGE] [RUN_MODE]
-
-# 参数说明
-RANK_TABLE_FILE: 由mindformers/tools/hccl_tools.py生成的分布式json文件
-CONFIG_PATH: 为configs文件夹下面的llama/run_llama_7b.yaml配置文件
-DEVICE_RANGE: 为单机分布式卡的范围，如[0,8]为8卡分布式，不包含8本身
-RUN_MODE: 为任务运行状态，支持关键字 train\finetune\eval\predict
-```
-
-##### 多机多卡
-
-- step 1. 多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-> **注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-- step 2. 根据服务器节点数等信息，修改相应的配置。
-
-```yaml
-# 以llama-13b模型两机训练为例，默认配置2机16卡，如果节点数有变，需要修改相应的配置。
-# 配置文件路径：../configs/llama/run_llama_13b.yaml
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 2
-  micro_batch_num: 16
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-```
-
-- step 3. 执行运行脚本。
-
-在多机上同时拉起任务，每台机器拉起方式参考单机多卡启动方式。需注意，多机多卡的拉起方式，相对于单机多卡，多了一个总卡数`[RANK_SIZE]`的入参。
-
-```shell
-# 第一台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the first device} ../configs/llama/run_llama_7b.yaml [0,8] train 16
-# 第二台机器
-bash run_distribute.sh {RANK_TABLE_FILE path of the second device} ../configs/llama/run_llama_7b.yaml [8,16] train 16
-```
-
-## 微调
-
-### 数据集准备-微调
-
-目前提供alpaca数据集的预处理脚本用于全参微调/lora微调任务。
-
-数据集下载链接如下：
-
-- [alpaca_data](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json)
-
-alpaca数据集原始格式样例：
-
-```text
-# alpaca examples:
-    {
-        "instruction": "Describe a time when you had to make a difficult decision.",
-        "input": "",
-        "output": "I had to make a difficult decision when I was working as a project manager at a construction company. I was in charge of a project that needed to be completed by a certain date in order to meet the client\u2019s expectations. However, due to unexpected delays, we were not able to meet the deadline and so I had to make a difficult decision. I decided to extend the deadline, but I had to stretch the team\u2019s resources even further and increase the budget. Although it was a risky decision, I ultimately decided to go ahead with it to ensure that the project was completed on time and that the client\u2019s expectations were met. The project was eventually successfully completed and this was seen as a testament to my leadership and decision-making abilities."
-    },
-    {
-        "instruction": "Identify the odd one out.",
-        "input": "Twitter, Instagram, Telegram",
-        "output": "Telegram"
-    },
-```
-
-- step 1. 执行`alpaca_converter.py`，使用fastchat工具添加prompts模板，将原始数据集转换为多轮对话格式。
-
-``` bash
-# 脚本路径：tools/dataset_preprocess/llama/alpaca_converter.py
-# 执行转换脚本
-python alpaca_converter.py \
---data_path /{path}/alpaca_data.json \
---output_path /{path}/alpaca-data-conversation.json
-```
-
-```text
-# 参数说明
-data_path: 存放alpaca数据的路径
-output_path: 输出转换后对话格式的数据路径
-```
-
-转换后格式样例：
-
-```text
-{
-    "id": "1",
-    "conversations": [
-      {
-        "from": "human",
-        "value": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGive three tips for staying healthy.\n\n### Response:"
-      },
-      {
-        "from": "gpt",
-        "value": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."
-      }
-    ]
-  },
-```
-
-- step 2. 执行`llama_preprocess.py`，进行数据预处理、Mindrecord数据生成，将带有prompt模板的数据转换为mindrecord格式。
-
-> **注：由于此工具依赖fschat工具包解析prompt模板，请提前安装fschat >= 0.2.13 python = 3.9**
-
-```bash
-# 脚本路径：tools/dataset_preprocess/llama/llama_preprocess.py
-python llama_preprocess.py \
---dataset_type qa \
---input_glob /{path}/alpaca-data-conversation.json \
---model_file /{path}/tokenizer.model \
---seq_length 2048 \
---output_file /{path}/alpaca-fastchat2048.mindrecord
-```
-
-### 全参微调
-
-以llama7b为例
-
-当前模型已支持使用**Flash Attention算法**进行全参微调，请参考 [Flash Attention使用文档](../feature_cards/Training_Algorithms.md#flash-attention)
-
-- step 1. 修改`config/llama/run_llama_7b.yaml`中训练数据集路径为微调数据集路径，并在`input_columns`中添加`labels`。
-
-```yaml
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "/{path}/alpaca-fastchat2048.mindrecord"
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-```
-
-- step 2. 修改训练时学习率和优化器参数，与预训练不同，微调学习率配置如下：
-
-```yaml
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 1.e-8
-  learning_rate: 1.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 1.e-5
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-```
-
-- step 3. 设置环境变量，变量配置如下：
-
-```bash
-export MS_DEV_SIDE_EFFECT_LOAD_ELIM=3  # 去除TensorMove
-export MS_MEMORY_POOL_RECYCLE=1  # 内存优化
-export GE_NOT_CUT=1   # 内存优化
-export MS_ASCEND_CHECK_OVERFLOW_MODE="INFNAN_MODE"
-```
-
-- step 4. 添加预训练权重路径，修改配置文件中的`load_checkpoint`，配置预训练权重路径。
-
-- step 5. 启动微调任务，llama-7b模型以单机八卡为例进行微调，命令如下：
-
-```shell
-cd scripts
-bash run_distribute.sh [RANK_TABLE_FILE] ../configs/llama/run_llama_7b.yaml [0,8] finetune
-```
-
-多机多卡微调任务启动参考[预训练章节](#预训练)，添加预训练权重，修改启动脚本中的`RUN_MODE`为`finetune`即可。
-
-### lora微调
-
-目前llama_7b模型适配了lora微调算法，并给出了默认配置文件`config/llama/run_llama_7b_lora.yaml`。
-
-#### 脚本启动
-
-- step 1. 修改配置文件，参考全参微调修改训练数据集路径与预训练权重路径。
-
-- step 2. 启动lora微调任务。(不建议开启INFNAN模式)。
-
-> 注：llama_7b_lora模型支持单卡启动，需将配置文件中的`use_parallel`参数置为`False`。
-
-```shell
-cd scripts
-# 单卡启动
-bash run_standalone.sh ../configs/llama/run_llama_7b_lora.yaml [DEVICE_ID] finetune
-# 多卡启动（以单机八卡为例）
-bash run_distribute.sh [RANK_TABLE_FILE] ../configs/llama/run_llama_7b_lora.yaml [0,8] finetune
-```
-
-#### API高阶接口启动
-
-lora微调支持使用高阶接口启动单卡微调任务，示例代码如下：
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers.trainer import Trainer
-# 初始化预训练任务
-trainer = Trainer(task='text_generation',
-                  model='llama_7b',
-                  pet_method='lora',
-                  train_dataset="{dataset file path}")
-# 调用finetune接口进行微调
-trainer.finetune(finetune_checkpoint="{checkpoint file path}")
-```
-
-## 评测
-
-Llama当前支持的评测任务如下：
-
-| 任务类型 |  评测指标  |  数据集   |
-| :------: | :--------: | :-------: |
-| 文本生成 | Perplexity | WikiText2 |
-| 阅读理解 |   Em/F1    | SQuAD 1.1 |
-
-### 文本生成
-
-step 1. 获取数据集
-
-[WikiText2数据集](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/dataset/wikitext-2/wikitext-2-v1.zip)是从维基百科上经过验证的优质文章集中提取的超过1亿个token的集合。
-
-step 2. 处理数据成mindrecord格式
-
-```bash
-# 使用tools/dataset_preprocess/llama/llama_preprocess.py进行数据预处理+Mindrecord数据生成
-python llama_preprocess.py \
---dataset_type wiki \
---input_glob  /{path}/wiki.valid.tokens \
---model_file /{path}/tokenizer.model \
---seq_length 2047 \
---output_file /{path}/wiki2048.mindrecord
-```
-
-step 3. 开启评测，指标为PPL
-
-```bash
-python run_mindformer.py \
---config configs/llama/run_llama_7b.yaml \
---eval_dataset_dir /{path}/wiki2048.mindrecord \
---run_mode eval \
---load_checkpoint /{path}/llama_7b.ckpt \
---epochs 1 \
---use_parallel False \
---device_id 0
-
-# PerplexityMetric = {'PerplexityMetric': {'loss': 2.1142693907022476, 'PPL': 8.283531529594038}}
-```
-
-### 阅读理解
-
-step 1. 获取数据集
-
-[SQuAD 1.1](https://data.deepai.org/squad1.1.zip)包含针对500+文章的10万+问答对,是一个阅读理解数据集，由维基百科文章上提出的问题组成，其中每个问题的答案都是相应文章中的一段文本。
-
-step 2. 处理数据成mindrecord格式
-
-```bash
-# 使用tools/dataset_preprocess/llama/squad_data_process.py进行数据预处理+Mindrecord数据生成
-python squad_data_process.py \
---input_file /{path}/squad/dev-v1.1.json \
---output_file /{path}/squad2048.mindrecord \
---mode eval \
---max_length 2048 \
---tokenizer_type "llama_7b"
-```
-
-预处理后数据格式举例：
-
-```text
-Read the passage and answer the question below.
-
-### Instruction:
-The Panthers finished the regular season with a 15–1 record, and quarterback Cam Newton was named the NFL Most Valuable Player (MVP). They defeated the Arizona Cardinals 49–15 in the NFC Championship Game and advanced to their second Super Bowl appearance since the franchise was founded in 1995. The Broncos finished the regular season with a 12–4 record, and denied the New England Patriots a chance to defend their title from Super Bowl XLIX by defeating them 20–18 in the AFC Championship Game. They joined the Patriots, Dallas Cowboys, and Pittsburgh Steelers as one of four teams that have made eight appearances in the Super Bowl.
-
-### Input:
-Which Carolina Panthers player was named Most Valuable Player?
-
-### Response:
-Cam Newton
-```
-
-step 3. 修改配置文件，eval_dataset的input_columns中增加`labels`，修改metric类型为`EmF1Metric`
-
-```yaml
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]      # 增加"labels"
-
-# metric
-metric:
-  type: EmF1Metric     # metric type设为EmF1Metric
-```
-
-此外，要提高推理速度，可以进行如下配置，设置增量推理`use_past`，并限制生成最大长度`max_new_tokens`。
-
-```yaml
-# model config
-use_past: True          # 开启增量推理
-extend_method: "None"
-offset: 0
-checkpoint_name_or_path: "llama_7b"
-repetition_penalty: 1
-max_decode_length: 512
-top_k: 3
-top_p: 1
-do_sample: False
-max_new_tokens: 20      #设置最大生成长度
-```
-
-step 4. 开启评测，指标为`Em/F1`
-
-```bash
-python run_mindformer.py \
---config configs/llama/run_llama_7b.yaml \
---eval_dataset_dir /{path}/squad2048.mindrecord \
---run_mode eval \
---load_checkpoint /{path}/llama_7b.ckpt \
---epochs 1 \
---batch_size 1 \
---use_parallel False \
---device_id 0
-
-# F1 score: 48.48954955952303, Em score: 26.850507982583455, total_count: 2067
-```
-
-## 推理
-
-> 注：修改模型配置项中的**use_past=True**，以开启增量推理，加速推理性能
-
-### 基于pipeline的推理
-
-以下为基于pipeline接口的自定义推理脚本，支持多卡多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-
-from mindformers import AutoConfig, AutoTokenizer, AutoModel, pipeline
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(model_type='llama_7b',
-         use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["I love Beijing, because",
-              "LLaMA is a",
-              "Huawei is a company that"]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained(model_type)
-    model_config.use_past = use_past
-    # if use parallel, data_parallel * model_parallel = device_num
-    model_config.parallel_config.data_parallel = 1
-    model_config.parallel_config.model_parallel = 1
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_type)
-    # build model from config
-    network = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard model and load sharded ckpt
-        model = Model(network)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(1, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(network, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer)
-    outputs = text_generation_pipeline(inputs)
-    for output in outputs:
-        print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_type', default='llama_7b', type=str,
-                        help='which model to use.')
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.model_type,
-         args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-以下为多卡运行自定义多batch推理的脚本
-
-```bash
-# >>> `run_predict.sh`文件
-CHECKPOINT_PATH=$2
-export RANK_TABLE_FILE=$1
-
-# define variable
-export RANK_SIZE=8
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export DEVICE_ID=$i
-    export RANK_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./predict_custom.py --use_parallel True --checkpoint_path $CHECKPOINT_PATH &> mindformers_$RANK_ID.log &
-done
-```
-
-#### 单卡pipeline推理
-
-```bash
-python predict_custom.py
-```
-
-#### 多卡pipeline推理
-
-- 修改yaml文件中分布式配置及并行模式，参考[模型权重切分与合并](../feature_cards/Transform_Ckpt.md)进行离线权重切分。**注**：推理暂不支持流水线并行
-
-- 将上述`predict_custom.py`中的分布式配置更改为预期的分布式配置
-
-```python
-model_config.parallel_config.data_parallel = 1
-model_config.parallel_config.model_parallel = 1
-```
-
-- 配置上述sh脚本中的卡数设置，默认是0-8卡
-
-```text
-export RANK_SIZE=8  # 总卡数
-export START_RANK=0 # 起始卡序号
-export END_RANK=8   # 结束卡序号
-```
-
-- 运行如下命令进行推理
-
-```bash
-bash run_predict.sh RANK_TABLE_FILE path/to/shard_checkpoint_dir
-```
-
-### 基于generate的推理
-
-以下为基于model.generate接口的自定义推理脚本，支持多卡多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-
-from mindformers import AutoConfig, AutoTokenizer, AutoModel
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(model_type='llama_7b',
-         use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["I love Beijing, because",
-              "LLaMA is a",
-              "Huawei is a company that"]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained(model_type)
-    # if use parallel, data_parallel * model_parallel = device_num
-    model_config.parallel_config.data_parallel = 1
-    model_config.parallel_config.model_parallel = 1
-    model_config.batch_size = len(inputs)
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_type)
-    # build model from config
-    network = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard model and load sharded ckpt
-        model = Model(network)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(1, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(network, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    inputs_ids = tokenizer(inputs, max_length=model_config.seq_length, padding="max_length")["input_ids"]
-    outputs = network.generate(inputs_ids, max_length=model_config.max_decode_length)
-    for output in outputs:
-        print(tokenizer.decode(output))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_type', default='llama_7b', type=str,
-                        help='which model to use.')
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.model_type,
-         args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-以下为多卡运行自定义多batch推理的脚本
-
-```bash
-# >>> `run_predict.sh`文件
-CHECKPOINT_PATH=$2
-export RANK_TABLE_FILE=$1
-
-# define variable
-export RANK_SIZE=8
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export DEVICE_ID=$i
-    export RANK_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./predict_custom.py --use_parallel True --checkpoint_path $CHECKPOINT_PATH &> mindformers_$RANK_ID.log &
-done
-```
-
-#### 单卡generate推理
-
-```bash
-python predict_custom.py
-```
-
-#### 多卡generate推理
-
-- 修改yaml文件中分布式配置及并行模式，参考[模型权重切分与合并](../feature_cards/Transform_Ckpt.md)进行离线权重切分。**注**：推理暂不支持流水线并行
-
-- 将上述`predict_custom.py`中的分布式配置更改为预期的分布式配置
-
-```text
-model_config.parallel_config.data_parallel = 1
-model_config.parallel_config.model_parallel = 1
-```
-
-- 配置上述sh脚本中的卡数设置，默认是0-8卡
-
-```text
-export RANK_SIZE=8  # 总卡数
-export START_RANK=0 # 起始卡序号
-export END_RANK=8   # 结束卡序号
-```
-
-- 运行如下命令进行推理
-
-```bash
-bash run_predict.sh RANK_TABLE_FILE path/to/shard_checkpoint_dir
-```
-
-### run_mindformer脚本启动
-
-#### 单卡推理
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_llama_7b.yaml --run_mode predict --predict_data 'I love Beijing, because' --use_parallel False
-```
-
-**注**：要提高推理速度，可在对应模型配置文件中进行如下配置，设置增量推理`use_past`为True。
-
-```python
-# model config
-use_past: True          # 开启增量推理
-extend_method: "None"
-offset: 0
-checkpoint_name_or_path: "llama_7b"
-repetition_penalty: 1
-max_decode_length: 512
-top_k: 3
-top_p: 1
-do_sample: False
-max_new_tokens: 128      #设置最大生成长度
-```
-
-### lora推理
-
-基于lora微调后的权重进行推理，可复用上述推理脚本，将入参`model_type`改为`llama_7b_lora`即可使用lora微调模型进行推理
-
-基于alpaca数据集微调的权重，要获得更好的推理效果，需使用alpaca的prompt模板，在instruction处填入自定义指令，以此作为模型的推理输入，替换推理脚本中的`inputs`；如下为prompt模板：
-
-```python
-input_data = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Response:".format("Tell me about alpacas.")
-
-# output:
-# Alpacas are a species of South American camelid. They are domesticated animals that are raised for their wool, meat, and milk. Alpacas are gentle, docile animals that are very friendly and easy to care for. They are also very intelligent and can be trained to perform certain tasks. Alpacas are very social animals and live in herds of up to 20 individuals. They are also very vocal and can make a variety of sounds, including a loud, high-pitched bark.
-```
-
-#### lora推理结果示例
-
-**Instruction**: Tell me about the president of Mexico in 2019.
-
-**Output**: The president of Mexico in 2019 is Andrés Manuel López Obrador. He was elected in 2018 and is the first left-wing president of Mexico in over 70 years. He is known for his populist policies and his focus on improving the lives of the poor and marginalized. He has also been criticized for his lack of transparency and his authoritarian style of leadership.
-
----
-
-**Instruction**: List all Canadian provinces in alphabetical order.
-
-**Output**: Alberta, British Columbia, Manitoba, New Brunswick, Newfoundland and Labrador, Nova Scotia, Ontario, Prince Edward Island, Quebec, Saskatchewan, and Yukon.
-
----
-
-**Instruction**: Write a Python program that prints the first 10 Fibonacci numbers.
-
-**Output**:
-
-```python
-def fibonacci(n):
-    if n == 0:
-        return 0
-    elif n == 1:
-        return 1
-    else:
-        return fibonacci(n-1) + fibonacci(n-2)
-
-print(fibonacci(10))
-```
-
----
-
-**Instruction**: Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.
-
-**Output**:
-
-```python
-def fizzbuzz(n):
-    if n % 3 == 0 and n % 5 == 0:
-        print('FizzBuzz')
-    elif n % 3 == 0:
-        print('Fizz')
-    elif n % 5 == 0:
-        print('Buzz')
-    else:
-        print(n)
-```
-
----
-
-**Instruction**: Translate the sentence 'I have no mouth but I must scream' into Spanish.
-
-**Output**: No tengo boca pero tengo que gritar.
-
----
diff --git a/docs/model_cards/mae.md b/docs/model_cards/mae.md
deleted file mode 100644
index 367cb51b..00000000
--- a/docs/model_cards/mae.md
+++ /dev/null
@@ -1,368 +0,0 @@
-# MAE
-
-## 模型描述
-
-MAE是一种基于MIM（Masked Image Modeling）的无监督学习方法。
-
-MAE由何恺明团队提出，将NLP领域大获成功的自监督预训练模式用在了计算机视觉任务上，效果拔群，在NLP和CV两大领域间架起了一座更简便的桥梁。
-
-[论文](https://gitee.com/link?target=https%3A%2F%2Farxiv.org%2Fabs%2F2111.06377): He, Kaiming et al. “Masked Autoencoders Are Scalable Vision Learners.” 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022): 15979-15988.
-
-## 模型性能
-
-- 基于Atlas 800
-
-|                            config                            |         task         |  Datasets   |    metric     | score  | [train performance](#预训练) | [prediction performance](#推理) |
-| :----------------------------------------------------------: | :------------------: | :---------: | :-----------: | :----: | :--------------------------: | :-----------------------------: |
-| [mae_vit_base_p16](../../configs/mae/run_mae_vit_base_p16_224_800ep.yaml) | image_classification | ImageNet-1K | Top1-Accuracy | 0.8372 |      262.31 samples/s/p      |          363.50 (fps)           |
-
-## 仓库介绍
-
-`MAE` 基于 `MindFormers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/mae`
-
-   ```bash
-   model
-       ├── __init__.py
-       ├── convert_weight.py         # 权重转换脚本
-       ├── mae.py                    # 模型实现
-       ├── mae_config.py             # 模型配置项
-       ├── mae_modules.py            # 模型所需模块
-       └── mae_processor.py          # Model预处理
-   ```
-
-2. 模型配置：`configs/mae`
-
-   ```bash
-   model
-       └── run_mae_vit_base_p16_224_800ep.yaml         # mae_vit_base模型启动配置
-   ```
-
-## 前期准备
-
-### [mindformers安装](path/to/README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-如果无需加载权重，或者使用from_pretrained功能自动下载，可以跳过此章节。
-
-MindFormers提供高级接口from_pretrained功能直接下载MindFormerBook中的[mae_vit_base_p16.ckpt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/mae/mae_vit_base_p16.ckpt[)，无需手动转换。
-
-本仓库中的`mae_vit_base_p16`来自于facebookresearch/mae的[ViT-Base](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth)，如需手动下载权重，可参考以下示例进行转换：
-
-1. 从上述的链接中下载`ViT-Base`的模型权重
-
-2. 执行转换脚本，得到转换后的输出文件`mae_vit_base_p16.ckpt`
-
-```bash
-python mindformers/models/mae/convert_weight.py --torch_path "PATH OF ViT-Base.pth" --mindspore_path "SAVE PATH OF mae_vit_base_p16.ckpt"
-```
-
-## 基于API的快速使用
-
-### 基于AutoClass的快速使用
-
-可以使用AutoClass接口，通过模型名称自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/vit`
-
-```python
-import mindspore
-from mindformers import AutoModel, AutoConfig
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 模型标志加载模型
-model = AutoModel.from_pretrained("mae_vit_base_p16")
-
-#模型配置加载模型
-config = AutoConfig.from_pretrained("mae_vit_base_p16")
-# {'decoder_dim': 512, 'patch_size': 16, 'in_chans': 3, 'embed_dim': 768, 'depth': 12,
-# ..., 'decoder_embed_dim': 512, 'norm_pixel_loss': True, 'window_size': None}
-model = AutoModel.from_config(config)
-
-print(model)
-# output
-```
-
-### 基于Trainer的快速训练、推理
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-# 初始化任务
-mae_trainer = Trainer(
-    task='masked_image_modeling',
-    model='mae_vit_base_p16',
-    train_dataset="imageNet-1k/train")
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-
-# 方式1: 从新开始训练，并使用训练好的权重进行推理
-mae_trainer.train() # 开启训练
-predict_result = mae_trainer.predict(predict_checkpoint=True, input_data=img)
-print(predict_result)
-
-# 方式2： 从obs下载训练好的权重并进行推理
-predict_result = mae_trainer.predict(input_data=img)
-print(predict_result)
-# output
-```
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-pipeline_task = pipeline("masked_image_modeling", model='mae_vit_base_p16')
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-pipeline_result = pipeline_task(img)
-print(pipeline_result)
-# output
-```
-
- Trainer和pipeline接口默认支持的task和model关键入参
-
-|    task（string）    | model（string）  |
-| :------------------: | :--------------: |
-| image_classification | mae_vit_base_p16 |
-
-## 预训练
-
-### 数据集准备-预训练
-
-使用的数据集：[ImageNet2012](http://www.image-net.org/)
-
-- 数据集大小：125G，共1000个类、125万张彩色图像
-    - 训练集：120G，共120万张图像
-    - 测试集：5G，共5万张图像
-- 数据格式：RGB
-
- ```text
-数据集目录格式
-└─imageNet-1k
-    ├─train                # 训练数据集
-    └─val                  # 评估数据集
- ```
-
-### 脚本启动
-
-#### 单卡训练
-
-- python启动
-
-```bash
-# pretrain
-python run_mindformer.py --config ./configs/mae/run_mae_vit_base_p16_224_800ep.yaml --run_mode train
-```
-
-#### 多卡训练
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/mae/run_mae_vit_base_p16_224_800ep.yaml [0,8] train 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-**注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/mae/run_mae_vit_base_p16_224_800ep.yaml [0,8] train $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/mae/run_mae_vit_base_p16_224_800ep.yaml [$rank_start,$rank_end] train $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 评测
-
-### 图像分类
-
-### 数据集准备-图像分类
-
-参考[数据集准备-预训练](#数据集准备-预训练)
-
-### 脚本启动
-
-#### 单卡评测
-
-```bash
-# evaluate
-python run_mindformer.py --config ./configs/vit/run_vit_base_p16_224_100ep.yaml --run_mode eval --eval_dataset_dir [DATASET_PATH]
-# output
-# MAE： Top1 Accuracy = {'Top1 Accuracy': 0.8371678937259923}
-```
-
-## 推理
-
-### 脚本启动
-
-#### 单卡推理
-
-```bash
-# predict
-python run_mindformer.py --config ./configs/mae/run_mae_vit_base_p16_224_800ep.yaml --run_mode predict --predict_data [PATH_TO_IMAGE]
-```
diff --git a/docs/model_cards/pangualpha.md b/docs/model_cards/pangualpha.md
deleted file mode 100644
index 579be6dc..00000000
--- a/docs/model_cards/pangualpha.md
+++ /dev/null
@@ -1,837 +0,0 @@
-# PanguAlpha
-
-## 模型描述
-
-「鹏程·盘古α」由以鹏城实验室为首的技术团队联合攻关，首次基于“鹏城云脑Ⅱ”和国产MindSpore框架的自动混合并行模式实现在2048卡算力集群上的大规模分布式训练，训练出业界首个2000亿参数以中文为核心的预训练生成语言模型。鹏程·盘古α预训练模型支持丰富的场景应用，在知识问答、知识检索、知识推理、阅读理解等文本生成领域表现突出，具备很强的小样本学习能力。
-
-[论文](https://arxiv.org/abs/2104.12369)J Wei Zeng, Xiaozhe Ren, Teng Su，et al., PanGu-α: Large-scale Autoregressive Pretrained Chinese Language Models with Auto-parallel Computation, 2021
-
-## 模型性能
-
-- 基于Atlas 800
-
-|                                              config                                              |      task       | Datasets  | metric |   score    | [train performance](#预训练) | [predict performance](#基于pipeline的推理) |
-| :----------------------------------------------------------------------------------------------: | :-------------: | :-------: | :----: | :--------: | :--------------------------: | :----------------------------------------: |
-|               [pangualpha_2_6b](../../configs/pangualpha/run_pangualpha_2_6b.yaml)               | text_generation | WikiText2 |   -    |     -      |       4075 tokens/s/p        |      19.5 tokens/s/p (use past True)       |
-|                [pangualpha_13b](../../configs/pangualpha/run_pangualpha_13b.yaml)                | text_generation | WikiText2 |   -    |     -      |        575 tokens/s/p        |      12.5 tokens/s/p (use past True)       |
-| [pangualpha_2_6b_prompt_txtcls](../../configs/pangualpha/run_pangualpha_2_6b_prompt_txtcls.yaml) | text_generation |   TNEWS   |  ACC   |   0.646    |              -               |                     -                      |
-|         [pangualpha_2_6b_em_f1](../../configs/pangualpha/run_pangualpha_2_6b_em_f1.yaml)         | text_generation | CMRC2018  | Em/F1  | 2.10/21.12 |              -               |                     -                      |
-
-## 仓库介绍
-
-`PanguAlpha` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/pangualpha`
-
-    ```bash
-    pangualpha
-        ├── __init__.py
-        ├── convert_weight.py              # 权重转换脚本
-        ├── pangualpha.py                  # 模型实现
-        ├── pangualpha_config.py           # 模型配置项
-        ├── pangualpha_processor.py        # Model预处理
-        └── pangualpha_tokenizer.py        # tokenizer
-    ```
-
-2. 模型配置：`configs/pangualpha`
-
-    ```bash
-    pangualpha
-        ├── run_pangualpha_2_6b.yaml                       # pangualpha_2_6b模型启动配置
-        ├── run_pangualpha_13b.yaml                        # pangualpha_13b模型启动配置
-        ├── run_pangualpha_2_6b_prompt_txtcls.yaml         # pangualpha_2_6b文本分类评测启动配置
-        └── run_pangualpha_2_6b_em_f1.yaml             # run_pangualpha_2_6b阅读理解评测启动配置
-    ```
-
-3. 预处理脚本和任务启动脚本：`mindformers\tools\dataset_preprocess\pangualpha`
-
-    ```bash
-    pangualpha
-        ├── pretrain_data_process.py     # wikitext-2等纯文本数据集预处理
-        ├── cmrc2018_data_process.py     # cmrc2018数据集预处理
-        └── tnews_data_process.py        # tnews数据集预处理
-    ```
-
-## 前期准备
-
-### [mindformers安装](../../README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环节)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-开发者可以下载获取官方权重后，通过下面提供的**权重转换脚本**，将官方权重转换为MindSpore权重；或直接使用MindFormers提供的**已转换权重**
-
-1. 使用官方权重进行转换
-    [官方盘古Alpha权重下载](https://openi.pcl.ac.cn/PCL-Platform.Intelligence/PanGu-Alpha)
-
-    **下载清单：xxB_part0-4.tar，xxB_xxx_embedding.npy，pangu_alpha_xxB_ckpt_strategy.ckpt**
-    需要全部下载xxB_part0-4.tar4个压缩包（解压后共有**512**个ckpt文件），**3**个不同的embedding.npy，以及对应参数的**strategy.ckpt**文件。
-
-    下载完成后，首先解压4个压缩包到同一个文件夹`path/to/512ckpt`
-
-    然后把3个不同的embedding.npy放置于同一个文件夹`path/to/embedding_dir`
-
-    以上两个文件夹可以相同。
-
-    然后运行如下转换脚本，将官方盘古Alpha的权重转换为完整的ckpt权重。
-
-    ```shell
-    python mindformers/models/pangualpha/convert_weight.py --config_path_or_name path/to/config --official_strategy_path path/to/pangu_alpha_13B_cktp_strategy.ckpt --official_ckpt_dir path/to/512ckpt --official_npy_dir path/to/embedding_dir --ckpt_save_path path/to/pangualpha.ckpt
-    ```
-
-    ```text
-    # 参数说明
-    config_path_or_name: 需要转换的模型配置文件，例如：'pangualpha_13b'或者 'path/to/run_pangualpha_13b.yaml'
-    official_strategy_path: 官方权重的切分策略文件，例如pangu_alpha_13B_ckpt_strategy.ckpt
-    official_ckpt_dir：官方权重文件夹，即path/to/512ckpt，存放了解压后的512个ckpt文件
-    official_npy_dir：官方embedding文件夹，即path/to/embedding_dir，存放了3个不同的embedding.npy文件
-    ckpt_save_path：你想存储最终转换完成的权重的路径以及权重名称
-    ```
-
-2. 获取MindFormers提供的已转换权重
-   可通过from_pretrained接口下载，也可直接从下面的链接获取
-    [MindFormers盘古Alpha2.6B权重下载](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/pangualpha/pangualpha_2_6b.ckpt)
-
-    [MindFormers盘古Alpha13B权重下载](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/pangualpha/pangualpha_13b.ckpt)
-
-### [模型权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-从hugging face或官方github仓库转换而来的权重通常是单卡权重，基于该权重进行多卡微调，评测，推理，涉及ckpt从单机策略到分布式策略的切换。
-
-通常训练采用分布式训练，基于该权重进行评测，推理多采用单卡，涉及ckpt从分布式策略到单机策略的切换。
-
-以上涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档模型[权重切分与合并](../feature_cards/Transform_Ckpt.md)
-
-## 基于API的快速使用
-
-### 基于AutoClass的快速使用
-
-可以使用AutoClass接口，通过模型名称获取相应的model/preprocess/tokenizer等实例，并自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/pangualpha`
-
-```python
-import mindspore
-from mindformers import AutoModel, AutoTokenizer
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-tokenizer = AutoTokenizer.from_pretrained('pangualpha_2_6b')
-model = AutoModel.from_pretrained('pangualpha_2_6b')
-
-inputs = tokenizer("上联：欢天喜地度佳节 下联：")
-outputs = model.generate(inputs["input_ids"], max_length=100)
-response = tokenizer.decode(outputs)[0]
-print(response)
-# 上联:欢天喜地度佳节 下联:笑逐颜开迎佳期 横批:幸福快乐<eot>'
-```
-
-**注：快速使用仅限单卡，该示例支持2.6B和13B模型。**
-**注：多卡请参考[基于generate的推理](#基于generate的推理)。**
-
-### 基于Trainer的快速训练，微调，评测，推理
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 初始化预训练任务
-trainer = Trainer(task='text_generation',
-                  model='pangualpha_2_6b',
-                  train_dataset='path/to/train_dataset',
-                  eval_dataset='path/to/eval_dataset')
-
-# 开启预训练
-trainer.train()
-
-# 开启全量微调
-trainer.finetune()
-
-# 开启评测
-trainer.evaluate()
-
-# 开启推理
-predict_result = trainer.predict(input_data="上联：欢天喜地度佳节 下联：")
-# output result is: [{'text_generation_text': ['上联:欢天喜地度佳节 下联:笑逐颜开迎佳期 横批:幸福快乐<eot>']}]
-```
-
-**注：快速使用仅限单卡，该示例在Atlas 800仅支持2.6B和13B的evaluate和predict，在Atlas 800T A2支持2.6Btrain和finetune及2.6B和13B的evaluate和predict。**
-**注：多卡请参考[使用高阶接口开发教程](https://mindformers.readthedocs.io/zh_CN/latest/docs/practice/Develop_With_Api.html)。**
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-pipeline_task = pipeline("text_generation", model='pangualpha_2_6b', max_length=50)
-pipeline_result = pipeline_task("上联：欢天喜地度佳节 下联：", top_k=3)
-print(pipeline_result)
-# [{'text_generation_text': ['上联:欢天喜地度佳节 下联:笑逐颜开庆佳节 横批:欢度佳节<eot>']}]
-```
-
-**注：快速使用仅限单卡，该示例支持2.6B和13B模型。**
-**注：多卡请参考[基于pipeline的推理](#基于pipeline的推理)。**
-
-## 预训练
-
-### 数据集准备-预训练
-
-以Wikitext2数据集为例
-
-- 数据集下载：[WikiText2数据集](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/dataset/wikitext-2/wikitext-2-v1.zip)
-
-- 词表下载：[model.vocab](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/pangualpha/vocab.model)
-
-将数据处理成Mindrecord格式。注：训练数据处理时，长度应等于模型接收长度加一。
-
-```bash
-cd mindformers/tools/dataset_preprocess/pangualpha
-# 生成Mindrecord数据，其中output_file需以字符串mindrecord结尾
-# 训练
-python pretrain_data_process.py --input_glob  'data/*.txt' --tokenizer jieba --eot 40000 --data_column_name input_ids --seq_length 1025
-# 评测
-python pretrain_data_process.py --input_glob  'data/*.txt' --tokenizer jieba --eot 40000 --data_column_name input_ids --seq_length 1024
-```
-
-### 脚本启动
-
-#### 单卡训练
-
-**注：在Atlas 800上无法单卡训练pangualpha模型。**
-**注：在Atlas 800T A2上单卡训练需要修改`pangualpha_2_6b.yaml`配置文件中`max_device_memory`为`57GB`，`batch_size`减小为`2`。**
-
-```yaml
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "57GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 2
-  sink_mode: True
-  sink_size: 2
-```
-
-- python启动
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_pangualpha_2_6b.yaml --run_mode train --use_parallel False
-```
-
-- bash启动
-
-```bash
-cd scripts
-bash run_standalone.sh ../configs/pangualpha/run_pangualpha_2_6b.yaml [DEVICE_ID] train
-```
-
-#### 多卡训练
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/pangualpha/run_pangualpha_2_6b.yaml [0,8] train 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-**注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/pangualpha/run_pangualpha_2_6b.yaml [0,8] train $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/pangualpha/run_pangualpha_2_6b.yaml [$rank_start,$rank_end] train $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 微调
-
-### 全参微调
-
-#### 单卡微调
-
-**注：在Atlas 800上无法单卡全参微调pangualpha模型。**
-**注：在Atlas 800T A2上单卡全参微调需要修改`pangualpha_2_6b.yaml`配置文件中`max_device_memory`为`57GB`，`batch_size`减小为`2`。**
-
-```yaml
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "57GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 2
-  sink_mode: True
-  sink_size: 2
-```
-
-- python启动
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_pangualpha_2_6b.yaml --run_mode finetune
-```
-
-- bash启动
-
-```bash
-cd scripts
-bash run_standalone.sh ../configs/pangualpha/run_pangualpha_2_6b.yaml [DEVICE_ID] finetune
-```
-
-#### 多卡微调
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/pangualpha/run_pangualpha_2_6b.yaml [0,8] finetune 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-**注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-在每台机器上启动`bash run_distribute.sh`。
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE path/to/config.yaml [0,8] finetune $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE path/to/config.yaml [$rank_start,$rank_end] finetune $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 评测
-
-### 文本分类
-
-#### 数据集准备-文本分类
-
-- 获取数据集: [TNEWS数据集](https://storage.googleapis.com/cluebenchmark/tasks/tnews_public.zip)自今日头条的新闻版块，共提取了15个类别的新闻，包括旅游，教育，金融，军事等。
-
-- 处理数据成mindrecord格式
-
-```bash
-# 注：生成的数据集文件需以.mindrecord结尾
-cd mindformers/tools/dataset_preprocess/pangualpha
-python tnews_data_process.py --input_file {your_path/dev.json} \
-                             --label_file {your_path/labels.json} \
-                             --output_file {your_path/tnews.mindrecord}
-```
-
-#### 单卡评测
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_pangualpha_2_6b_prompt_txtcls.yaml \
-                            --eval_dataset_dir {your_path/tnews.mindrecord} \
-                            --run_mode eval
-# ACC: 0.646, total_acc_num: 6458, total_num: 10000
-```
-
-### 阅读理解
-
-#### 数据集准备-阅读理解
-
-- 获取数据集: [CMRC2018数据集](https://storage.googleapis.com/cluebenchmark/tasks/cmrc2018_public.zip)是用于中文机器阅读理解的片段抽取任务(Span-Extraction)的数据，这个数据集由近20000个真实的问题组成，这些问题由人类专家在维基百科的段落中注释。
-
-- 处理数据成mindrecord格式
-
-```bash
-# 注：生成的数据集文件需以.mindrecord结尾
-cd mindformers/tools/dataset_preprocess/pangualpha
-python cmrc2018_data_process.py --train_file {your_path/train.json} \
-                                    --dev_file {your_path/dev.json} \
-                                    --output_file {your_path/cmrc2018.mindrecord}
-```
-
-#### 单卡评测
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_pangualpha_2_6b_prompt_txtcls.yaml \
-                          --eval_dataset_dir {your_path/tnews.mindrecord} \
-                          --run_mode eval
-# ACC: 0.646, total_acc_num: 6458, total_num: 10000
-```
-
-## 推理
-
-### 基于pipeline的推理
-
-以下为基于pipeline接口的自定义推理脚本，支持多卡多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-from mindformers import AutoConfig, AutoTokenizer, AutoModel, pipeline
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["上联：欢天喜地度佳节 下联：",
-              "四川的省会是哪里？",
-              "李大钊如果在世，他会对今天的青年人说："]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained("pangualpha_2_6b")
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("pangualpha_2_6b")
-    # build model from config
-    network = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard pangualpha and load sharded ckpt
-        model = Model(network)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(1, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(model, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    text_generation_pipeline = pipeline(task="text_generation", model=network, tokenizer=tokenizer)
-    outputs = text_generation_pipeline(inputs)
-    for output in outputs:
-        print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-以下为多卡运行自定义多batch推理的脚本
-
-```bash
-# >>> `run_predict.sh`文件
-CHECKPOINT_PATH=$2
-export RANK_TABLE_FILE=$1
-
-# define variable
-export RANK_SIZE=8
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export RANK_ID=$i
-    export DEVICE_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./predict_custom.py --use_parallel True --checkpoint_path CHECKPOINT_PATH &> minformers_$RANK_ID.log &
-done
-```
-
-#### 单卡pipeline推理
-
-```bash
-python predict_custom.py
-```
-
-#### 多卡pipeline推理
-
-```bash
-bash run_predict.sh RANK_TABLE_FILE path/to/pangualpha_2_6b_shard_checkpoint_dir
-```
-
-### 基于generate的推理
-
-以下为基于model.generate接口的自定义推理脚本，支持多卡多batch推理。
-
-```python
-# predict_custom.py 文件
-import os
-import argparse
-import numpy as np
-
-import mindspore as ms
-from mindspore.train import Model
-from mindspore import load_checkpoint, load_param_into_net
-from mindformers import AutoConfig, AutoTokenizer, AutoModel
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.trainer.utils import get_last_checkpoint
-from mindformers.tools.utils import str2bool, get_real_rank
-
-
-def context_init(use_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(use_parallel=False,
-         device_id=0,
-         checkpoint_path="",
-         use_past=True):
-    """main function."""
-    # 初始化单卡/多卡环境
-    context_init(use_parallel, device_id)
-
-    # 多batch输入
-    inputs = ["上联：欢天喜地度佳节 下联：",
-              "四川的省会是哪里？",
-              "李大钊如果在世，他会对今天的青年人说："]
-
-    # set model config
-    model_config = AutoConfig.from_pretrained("pangualpha_2_6b")
-    model_config.batch_size = len(inputs)
-    model_config.use_past = use_past
-    if checkpoint_path and not use_parallel:
-        model_config.checkpoint_name_or_path = checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("pangualpha_2_6b")
-    # build model from config
-    model = AutoModel.from_config(model_config)
-
-    # if use parallel, load distributed checkpoints
-    if use_parallel:
-        # find the sharded ckpt path for this rank
-        ckpt_path = os.path.join(checkpoint_path, "rank_{}".format(get_real_rank()))
-        ckpt_path = get_last_checkpoint(ckpt_path)
-        print("ckpt path: %s", str(ckpt_path))
-
-        # shard pangualpha and load sharded ckpt
-        model = Model(model)
-        model.infer_predict_layout(ms.Tensor(np.ones(shape=(1, model_config.seq_length)), ms.int32))
-        checkpoint_dict = load_checkpoint(ckpt_path)
-        not_load_network_params = load_param_into_net(model, checkpoint_dict)
-        print("Network parameters are not loaded: %s", str(not_load_network_params))
-
-    inputs_ids = tokenizer(inputs, max_length=model_config.max_decode_length, padding="max_length")["input_ids"]
-    outputs = model.generate(inputs_ids, max_length=model_config.max_decode_length)
-    for output in outputs:
-        print(tokenizer.decode(output))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='whether use parallel.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    args = parser.parse_args()
-
-    main(args.use_parallel,
-         args.device_id,
-         args.checkpoint_path,
-         args.use_past)
-```
-
-以下为多卡运行自定义多batch推理的脚本
-
-```bash
-# >>> `run_predict.sh`文件
-CHECKPOINT_PATH=$2
-export RANK_TABLE_FILE=$1
-
-# define variable
-export RANK_SIZE=8
-export START_RANK=0 # this server start rank
-export END_RANK=8 # this server end rank
-
-# run
-for((i=${START_RANK}; i<${END_RANK}; i++))
-do
-    export RANK_ID=$i
-    export DEVICE_ID=$((i-START_RANK))
-    echo "Start distribute running for rank $RANK_ID, device $DEVICE_ID"
-    python3 ./predict_custom.py --use_parallel True --checkpoint_path CHECKPOINT_PATH &> minformers_$RANK_ID.log &
-done
-```
-
-#### 单卡generate推理
-
-```bash
-python predict_custom.py
-```
-
-#### 多卡generate推理
-
-```bash
-bash run_predict.sh RANK_TABLE_FILE path/to/pangualpha_2_6b_shard_checkpoint_dir
-```
-
-### 脚本启动
-
-#### 单卡推理
-
-```bash
-python run_mindformer.py --config configs/pangualpha/run_pangualpha_2_6b.yaml --run_mode predict --predict_data 上联：欢天喜地度佳节 下联： --use_parallel False
-# output result is: [{'text_generation_text': ['上联:欢天喜地度佳节 下联:笑逐颜开迎佳期 横批:幸福快乐<eot>']}]
-```
-
-**注**：要提高推理速度，可在对应模型配置文件中进行如下配置，设置增量推理`use_past`为True。
-
-```yaml
-# model config
-use_past: True          # 开启增量推理
-use_moe: False
-expert_num: 1
-per_token_num_experts_chosen: 1
-checkpoint_name_or_path: "pangualpha_2_6b"
-repetition_penalty: 1
-max_decode_length: 1024
-top_k: 3
-top_p: 1
-do_sample: False
-```
diff --git a/docs/model_cards/sam.md b/docs/model_cards/sam.md
deleted file mode 100644
index 89cb6364..00000000
--- a/docs/model_cards/sam.md
+++ /dev/null
@@ -1,364 +0,0 @@
-# Segment Anything Model
-
-## 模型描述
-
-SAM（Segment Anything Model）是Meta的FAIR实验室发布的图像分割模型，基于SA-1B数据集训练，SA-1B数据集包含了1100万图片，拥有11亿分割掩码，专注于可提示的分割任务，使用提示工程来适应不同的下游分割任务。
-
-论文：https://arxiv.org/abs/2304.02643
-
-SA-1B数据集：https://segment-anything.com/dataset/index.html
-
-## 模型准备
-
-SAM开源了[base](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth)(358M)、[large](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth)(1.2G)、[huge](https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth)(2.4G)三个不同规格大小的模型，用户下载原生模型后，使用提供的`convert_weight.py`脚本将pth模型转为mindspore支持的ckpt模型。
-
-```shell
-# 以sam_vit_b_01ec64.pth模型为例，转换后权重保存为同目录下的sam_vit_b_01ec64.ckpt
-python mindformers/models/sam/convert_weight.py --torch_path path/sam_vit_b_01ec64.pth
-```
-
-转好模型后，在配置文件中配置模型路径。
-
-```shell
-checkpoint_name_or_path: "path/sam_vit_b_01ec64.ckpt"
-```
-
-## 快速使用
-
-Mindformer提供了SAM推理的pipeline，用户通过创建和调用pipeline来做推理。
-
-- 创建pipeline
-
-```python
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-import mindspore as ms
-
-from mindformers.pipeline import pipeline
-
-ms.set_context(device_target="Ascend", device_id=0, mode=0)
-
-pipeline_task = pipeline("segment_anything", model='sam_vit_h')
-```
-
-- 定义画图函数
-
-```python
-def show_mask(mask, ax, random_color=False):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        color = np.array([30/255, 144/255, 255/255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    ax.imshow(mask_image)
-
-def show_points(coords, labels, ax, marker_size=375):
-    pos_points = coords[labels==1]
-    neg_points = coords[labels==0]
-    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
-    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
-
-def show_box(box, ax):
-    x0, y0 = box[0], box[1]
-    w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
-
-def show_anns(anns):
-    if len(anns) == 0:
-        return
-    sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
-    ax = plt.gca()
-    ax.set_autoscale_on(False)
-
-    for ann in sorted_anns:
-        m = ann['segmentation']
-        img = np.ones((m.shape[0], m.shape[1], 3))
-        color_mask = np.random.random((1, 3)).tolist()[0]
-        for i in range(3):
-            img[:,:,i] = color_mask[i]
-        ax.imshow(np.dstack((img, m*0.35)))
-```
-
-- 使用pipeline提前抽取图像特征：性能耗时约为300ms。
-
-```python
-# 方式1：传入cv2图像
-image = cv2.imread("scripts/examples/segment_anything/images/truck.jpg")
-image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-pipeline_task.set_image(image)
-
-# 方式2：传入图像路径
-pipeline_task.set_image("scripts/examples/segment_anything/images/truck.jpg")
-```
-
-**基于Prompt分割**：可以传入坐标点或坐标框作为prompt，模型自动对点所在位置或框内物体进行分割，性能耗时约为50ms。
-
-以下为几种Prompt的使用方式：
-
-- 1、单点确定一个物体
-
-```python
-input_point = np.array([[500, 375]]) # 单坐标点，2维
-input_label = np.array([1]) # 单坐标点的label，1：前景点，需要分割；2：背景点，不需要分割。
-
-outputs = pipeline_task({"points": input_point, # prompt以字典的方式传入
-                        "labels": input_label},
-                        multimask_output=True) # multimask_output=True输出3个掩码图
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits_single = outputs["low_res_masks"]
-
-for i, (mask, score) in enumerate(zip(masks, scores)):
-    plt.figure(figsize=(10,10))
-    plt.imshow(image)
-    show_mask(mask, plt.gca())
-    show_points(input_point, input_label, plt.gca())
-    plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
-    plt.axis('off')
-    plt.show()
-```
-
-![1-单点确定一个物体_1](../../scripts/examples/segment_anything/examples/1-单点确定一个物体_0.png)
-
-![1-单点确定一个物体_2](../../scripts/examples/segment_anything/examples/1-单点确定一个物体_1.png)
-
-![1-单点确定一个物体_3](../../scripts/examples/segment_anything/examples/1-单点确定一个物体_2.png)
-
-- 2、多点确定相同物体（以两点为例）
-
-```python
-input_point = np.array([[500, 375], [1125, 625]]) # 多坐标点，2维，代表分割的是同一物体
-input_label = np.array([1, 1]) # 多坐标点的label，1维，代表分割的是同一物体
-
-outputs = pipeline_task({"points": input_point,
-                        "labels": input_label},
-                        multimask_output=False)
-
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10,10))
-plt.imshow(image)
-show_mask(masks, plt.gca())
-show_points(input_point, input_label, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![2-两点确定相同物体](../../scripts/examples/segment_anything/examples/2-两点确定相同物体.png)
-
-- 3、多点确定不同物体（以两点为例）
-
-```python
-input_point = np.array([
-        [[500, 375]],
-        [[1125, 625]],
-    ]) # 多坐标点，3维，代表分割的是不同物体
-input_label = np.array([[1], [1]]) # 多坐标点的label，2维，代表分割的是不同物体
-
-outputs = pipeline_task({"points": input_point,
-                        "labels": input_label},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-for mask in masks:
-    show_mask(mask, plt.gca(), random_color=True)
-show_points(input_point.reshape(-1, 2), input_label.reshape(-1), plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![3-两点确定不同物体](../../scripts/examples/segment_anything/examples/3-两点确定不同物体.png)
-
-- 4、前景点和背景点
-
-```python
-input_point = np.array([[500, 375], [1125, 625]])
-input_label = np.array([1, 0]) # label=1为前景点、label=0为背景点，背景点则不分割
-
-outputs = pipeline_task({"points": input_point,
-                        "labels": input_label},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-show_mask(masks, plt.gca())
-show_points(input_point, input_label, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![4-一个前景点和背景点](../../scripts/examples/segment_anything/examples/4-一个前景点和背景点.png)
-
-- 5、单框确定一个物体
-
-```python
-input_box = np.array([425, 600, 700, 875]) # 单坐标框，1维
-
-outputs = pipeline_task({"boxes": input_box},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-show_mask(masks[0], plt.gca())
-show_box(input_box, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![5-单框确定一个物体](../../scripts/examples/segment_anything/examples/5-单框确定一个物体.png)
-
-- 6、框和背景点确定物体
-
-```python
-# 可同时传入坐标框和坐标点的组合
-input_box = np.array([425, 600, 700, 875])
-input_point = np.array([[575, 750]])
-input_label = np.array([0])
-
-outputs = pipeline_task({"points": input_point,
-                        "labels": input_label,
-                        "boxes": input_box},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-show_mask(masks[0], plt.gca())
-show_box(input_box, plt.gca())
-show_points(input_point, input_label, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![6-框和背景点确定物体](../../scripts/examples/segment_anything/examples/6-框和背景点确定物体.png)
-
-- 7、多组框和点确定不同物体
-
-```python
-input_boxes = np.array([[425, 600, 700, 875],
-                        [1360, 525, 1680, 780]])
-input_points = np.array([[[575, 750]],
-                        [[1525, 670]]])
-input_labels = np.array([[1], [1]])
-
-outputs = pipeline_task({"points": input_points,
-                        "labels": input_labels,
-                        "boxes": input_boxes},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-for mask in masks:
-    show_mask(mask, plt.gca(), random_color=True)
-for box in input_boxes:
-    show_box(box, plt.gca())
-for point, label in zip(input_points, input_labels):
-    show_points(point, label, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![7-多组框和点确定不同物体](../../scripts/examples/segment_anything/examples/7-多组框和点确定不同物体.png)
-
-- 8、多个框确定不同物体
-
-```python
-input_boxes = np.array([
-        [75, 275, 1725, 850],
-        [425, 600, 700, 875],
-        [1375, 550, 1650, 800],
-        [1240, 675, 1400, 750],
-    ]) # 多坐标框，2维，分割不同物体
-
-outputs = pipeline_task({"boxes": input_boxes},
-                        multimask_output=False)
-masks = outputs["masks"]
-scores = outputs["iou_predictions"]
-logits = outputs["low_res_masks"]
-
-plt.figure(figsize=(10, 10))
-plt.imshow(image)
-for mask in masks:
-    show_mask(mask, plt.gca(), random_color=True)
-for box in input_boxes:
-    show_box(box, plt.gca())
-plt.axis('off')
-plt.show()
-```
-
-![8-多个框确定不同物体](../../scripts/examples/segment_anything/examples/8-多个框确定不同物体.png)
-
-**基于整图分割**：整图分割本质上是在图上构造网格坐标点矩阵作为Prompt，默认坐标点数量为32*32，模型内部采用批处理的方式，即每次传入N个坐标点作为Prompt，直到处理完32*32个坐标点，默认N=64。每批次处理完后会做一系列后处理，收集完所有mask后统一做非极大值抑制(NMS)处理，以过滤掉重复的mask。
-
-用户可直接传入图像，并设置`seg_image=True`，模型自动对整张图像进行分割，默认参数下性能耗时约为30s。
-
-- 9、全图分割：默认参数
-
-```python
-# 方式1：传入cv2图像
-image = cv2.imread("scripts/examples/segment_anything/images/dog.jpg")
-image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-masks = pipeline_task({"image": image}, seg_image=True)
-
-# 方式2：传入图像路径
-masks = pipeline_task({"image": "scripts/examples/segment_anything/images/dog.jpg"}, seg_image=True)
-
-plt.figure(figsize=(20,20))
-plt.imshow(image)
-show_anns(masks)
-plt.axis('off')
-plt.show()
-```
-
-![9-全图分割](../../scripts/examples/segment_anything/examples/9-全图分割.png)
-
-- 10、全图分割：根据图像适当调整参数。
-
-```python
-masks = pipeline_task({"image": image}, seg_image=True,
-                      points_per_side = 32,
-                      pred_iou_thresh = 0.86,
-                      stability_score_thresh = 0.92,
-                      crop_n_layers = 1,
-                      crop_n_points_downscale_factor = 2,
-                      min_mask_region_area = 100)
-plt.figure(figsize=(20,20))
-plt.imshow(image)
-show_anns(masks)
-plt.axis('off')
-plt.show()
-
-# 参数说明：
-# points_per_side: 控制默认坐标点数量，传入N，表示构造N*N网格点，默认32。
-# pred_iou_thresh: 控制iou阈值，越大mask精度越高，默认0.88。
-# stability_score_thresh: mask稳定性得分阈值，越大mask精度越高，默认0.95。
-# crop_n_layers: 额外裁剪图像层数，大于0，表示对图像做额外裁剪，并基于原图+裁剪图像做分割，提高分割精度，但会增加额外耗时；默认0，表示只基于原图做分割。
-# crop_n_points_downscale_factor: 额外裁剪图像对应的坐标点数量下采样比例，比如原图坐标点数量是32*32，第i层裁剪图像对应的坐标点数量为(32/crop_n_points_downscale_factor^i)*(32/crop_n_points_downscale_factor^i)；默认为1，表示裁剪图像和原图使用相同数量的网格点。
-# min_mask_region_area: mask最小面积，面积小于min_mask_region_area的mask都会被过滤，默认为0，表示不对面积做过滤。
-```
-
-![10-全图分割](../../scripts/examples/segment_anything/examples/10-全图分割.png)
-
-## 注意事项
-
-- `mindformers/scripts/examples/segment_anything/`目录下提供了`sam_by_prompt.py`和`sam_by_image.py`两个参考推理脚本，运行前需要在环境中提前安装`mindformers`。
\ No newline at end of file
diff --git a/docs/model_cards/swin.md b/docs/model_cards/swin.md
deleted file mode 100644
index 013afdab..00000000
--- a/docs/model_cards/swin.md
+++ /dev/null
@@ -1,395 +0,0 @@
-# Swin
-
-## 模型描述
-
-Swin：全名Swin Transformer，是一个基于Transformer在视觉领域有着SOTA表现的深度学习模型。比起ViT拥有更好的性能和精度。
-
-[论文](https://arxiv.org/abs/2103.14030) Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo, 2021
-
-## 模型性能
-
-- 基于Atlas 800
-
-|                            config                            |         task         |  Datasets   |    metric     | score  | [train performance](#预训练) | [prediction performance](#推理) |
-| :----------------------------------------------------------: | :------------------: | :---------: | :-----------: | :----: | :--------------------------: | :-----------------------------: |
-| [swin_base_p4w7](../../configs/swin/run_swin_base_p4w7_100ep.yaml) | image_classification | ImageNet-1K | Top1-Accuracy | 0.8345 |      182.43 samples/s/p      |          233.43 (fps)           |
-
-## 仓库介绍
-
-`Swin` 基于 `MindFormers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/swin`
-
-   ```bash
-   model
-       ├── __init__.py
-       ├── convert_weight.py         # 权重转换脚本
-       ├── swin.py                    # 模型实现
-       ├── swin_config.py             # 模型配置项
-       ├── swin_modules.py            # 模型所需模块
-       └── swin_processor.py          # Model预处理
-   ```
-
-2. 模型配置：`configs/vit`
-
-   ```bash
-   model
-       └── run_swin_base_p4w7_100ep.yaml         # vit_base模型启动配置
-   ```
-
-## 前期准备
-
-### [mindformers安装](path/to/README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-如果无需加载权重，或者使用from_pretrained功能自动下载，可以跳过此章节。
-
-MindFormers提供高级接口from_pretrained功能直接下载MindFormerBook中的[swin_base_p4w7.ckpt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/swin/swin_base_p4w7.ckpt)，无需手动转换。
-
-本仓库中的`swin_base_p4w7`来自于MicroSoft的[Swin-Transformer](https://github.com/microsoft/Swin-Transformer), 如需手动下载权重，可参考以下示例进行转换：
-
-1. 从[swin_base_p4w7](https://pan.baidu.com/s/16bqCTEc70nC_isSsgBSaqQ?pwd=swin)链接中下载官方权重，文件名为`swin_base_patch4_window7_224.pth`
-
-2. 执行转换脚本，得到转换后的输出文件`swin_base_p4w7.ckpt`
-
-```bash
-python mindformers/models/swin/convert_weight.py --torch_path swin_base_patch4_window7_224.pth --mindspore_path swin_base_p4w7.ckpt --is_pretrain False
-```
-
-如需转换官方SimMIM的预训练权重进行finetune，则执行如下步骤：
-
-1. 从[SimMIM](https://github.com/microsoft/SimMIM)官网提供的google网盘下载[simmim_swin_192](https://drive.google.com/file/d/1Wcbr66JL26FF30Kip9fZa_0lXrDAKP-d/view?usp=sharing)的官方权重，文件名为`simmim_pretrain_swin_base_img192_window6_100ep.pth`
-
-2. 执行转换脚本，得到转换后的输出文件`simmim_swin_p4w6.ckpt`
-
-```bash
-python mindformers/models/swin/convert_weight.py --torch_path simmim_pretrain_swin_base_img192_window6_100ep.pth --mindspore_path simmim_swin_p4w6.ckpt --is_pretrain True
-```
-
-## 基于API的快速使用
-
-### 基于AutoClass的快速使用
-
-可以使用AutoClass接口，通过模型名称自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/swin`
-
-```python
-import mindspore
-from mindformers import AutoModel, AutoConfig
-from mindformers.tools.image_tools import load_image
-from mindformers import SwinImageProcessor
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 模型标志加载模型
-model = AutoModel.from_pretrained("swin_base_p4w7")
-
-#模型配置加载模型
-config = AutoConfig.from_pretrained("swin_base_p4w7")
-# {'batch_size': 128, 'image_size': 224, 'patch_size': 4, 'num_labels': 1000, 'num_channels': 3,
-# 'embed_dim': 128, 'depths': [2, 2, 18, 2], 'num_heads': [4, 8, 16, 32],
-# 'checkpoint_name_or_path': 'swin_base_p4w7'}
-model = AutoModel.from_config(config)
-
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-image_processor = SwinImageProcessor(size=224)
-processed_img = image_processor(img)
-
-predict_result = model(processed_img)
-
-# output
-# (Tensor(shape=[1, 1000], dtype=Float32, value=
-# [[-5.19241571e-01, -1.37802780e-01,  3.77173603e-01 ... -5.00497580e-01,  5.52467167e-01, -2.11867809e-01]]), None)
-```
-
-### 基于Trainer的快速训练、评测、推理
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-# 初始化任务
-swin_trainer = Trainer(
-    task='image_classification',
-    model='swin_base_p4w7',
-    train_dataset="imageNet-1k/train",
-    eval_dataset="imageNet-1k/val")
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-
-# 方式1：开启训练，并使用训练好的权重进行eval和推理
-swin_trainer.train()
-swin_trainer.evaluate(eval_checkpoint=True)
-predict_result = swin_trainer.predict(predict_checkpoint=True, input_data=img, top_k=3)
-print(predict_result)
-
-# 方式2：从obs下载训练好的权重并进行eval和推理
-swin_trainer.evaluate() # 下载权重进行评估
-predict_result = swin_trainer.predict(input_data=img, top_k=3) # 下载权重进行推理
-print(predict_result)
-
-# output
-# - mindformers - INFO - output result is: [[{'score': 0.89573187, 'label': 'daisy'},
-# {'score': 0.005366202, 'label': 'bee'}, {'score': 0.0013296203, 'label': 'fly'}]]
-```
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-pipeline_task = pipeline("image_classification", model='swin_base_p4w7')
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-pipeline_result = pipeline_task(img, top_k=3)
-print(pipeline_result)
-
-# output
-# [[{'score': 0.89573187, 'label': 'daisy'}, {'score': 0.005366202, 'label': 'bee'},
-# {'score': 0.0013296203, 'label': 'fly'}]]
-```
-
-  Trainer和pipeline接口默认支持的task和model关键入参
-
-|    task（string）    | model（string） |
-| :------------------: | :-------------: |
-| image_classification | swin_base_p4w7  |
-
-## 预训练
-
-### 数据集准备-预训练
-
-使用的数据集：[ImageNet2012](http://www.image-net.org/)
-
-- 数据集大小：125G，共1000个类、125万张彩色图像
-    - 训练集：120G，共120万张图像
-    - 测试集：5G，共5万张图像
-- 数据格式：RGB
-
- ```text
-数据集目录格式
-└─imageNet-1k
-    ├─train                # 训练数据集
-    └─val                  # 评估数据集
- ```
-
-### 脚本启动
-
-#### 单卡训练
-
-- python启动
-
-```bash
-# pretrain
-python run_mindformer.py --config ./configs/swin/run_swin_base_p4w7_224_100ep.yaml --run_mode train --train_dataset_dir [DATASET_PATH]
-```
-
-#### 多卡训练
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/swin/run_swin_base_p4w7_224_100ep.yaml [0,8] train 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-**注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/swin/run_swin_base_p4w7_224_100ep.yaml [0,8] train $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/swin/run_swin_base_p4w7_224_100ep.yaml [$rank_start,$rank_end] train $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 评测
-
-### 图像分类
-
-### 数据集准备-图像分类
-
-参考[数据集准备-预训练](#数据集准备-预训练)
-
-### 脚本启动
-
-#### 单卡评测
-
-```bash
-# evaluate
-python run_mindformer.py --config ./configs/swin/run_swin_base_p4w7_224_100ep.yaml --run_mode eval --eval_dataset_dir [DATASET_PATH]
-# output
-# Swin： Top1 Accuracy = {'Top1 Accuracy': 0.8345352564102564}
-```
-
-## 推理
-
-### 脚本启动
-
-#### 单卡推理
-
-```bash
-# predict
-python run_mindformer.py --config ./configs/swin/run_swin_base_p4w7_224_100ep.yaml --run_mode predict --predict_data [PATH_TO_IMAGE]
-```
diff --git a/docs/model_cards/t5.md b/docs/model_cards/t5.md
deleted file mode 100644
index ba7bd6cd..00000000
--- a/docs/model_cards/t5.md
+++ /dev/null
@@ -1,127 +0,0 @@
-# T5
-
-## 模型描述
-
-T5:全名`Text-to-Text Transfer Transformer`模型是谷歌在2019年基于C4数据集训练的Transformer模型。
-
-[论文](https://arxiv.org/abs/1910.10683)C Raffel，N Shazeer，A Roberts，K Lee，S Narang，M Matena，Y Zhou，W Li，PJ Liu, 2020
-
-## 数据集准备
-
-使用的数据集：[WMT16](https://cdn-datasets.huggingface.co/translation/wmt_en_ro.tar.gz)
-
-对应的文件路径如下：
-
-```bash
-└── wmt_en_ro
-    ├── test.source
-    ├── test.target
-    ├── train.source
-    ├── train.target
-    ├── val.source
-    └── val.target
-```
-
-## 快速使用
-
-### 脚本启动
-
-> 需开发者提前clone工程。
-
-- 请参考[使用脚本启动](../../README.md#方式一使用已有脚本启动)
-
-示例命令如下，将会执行一个只有1层的T5模型训练
-
-```shell
-python run_mindformer.py --config configs/t5/run_t5_tiny_on_wmt16.yaml --run_mode train  \
-                         --device_target Ascend \
-                         --train_dataset_dir /your_path/wmt_en_ro
-```
-
-其中`device_target`根据用户的运行设备不同，可选`GPU/Ascend/CPU`。`config`的入参还可以为`configs/t5/run_t5_small.yaml`，在
-这个配置下将会加载`t5_small`的权重并且开始执行微调。
-
-### 调用API启动
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-
-#### Model调用接口
-
-- 模型计算Loss
-
-```python
-from mindformers import T5ForConditionalGeneration, T5Tokenizer
-
-model = T5ForConditionalGeneration.from_pretrained('t5_small')
-tokenizer = T5Tokenizer.from_pretrained('t5_small')
-
-src_output = tokenizer(["hello world"], padding='max_length', max_length=model.config.seq_length,
-                       return_tensors='ms')
-
-model_input = tokenizer(["So happy to see you!"], padding='max_length', max_length=model.config.max_decode_length,
-                        return_tensors='ms')["input_ids"]
-input_ids = src_output['input_ids']
-attention_mask = src_output['attention_mask']
-output = model(input_ids, attention_mask, model_input)
-print(output)
-# [5.64458]
-```
-
-- 推理
-
-执行下述的命令，可以自动云上拉取`t5_small`模型并且进行推理。
-
-```python
-from mindformers import T5ForConditionalGeneration, T5Tokenizer
-
-t5 = T5ForConditionalGeneration.from_pretrained("t5_small")
-tokenizer = T5Tokenizer.from_pretrained("t5_small")
-words = tokenizer("translate the English to the Romanian: UN Chief Says There Is No Military "
-                  "Solution in Syria")['input_ids']
-output = t5.generate(words, do_sample=False)
-output = tokenizer.decode(output, skip_special_tokens=True)
-print(output)
-# "eful ONU declară că nu există o soluţie militară în Siri"
-```
-
-- Trainer接口开启训练/预测：
-
-```python
-import mindspore; mindspore.set_context(mode=0, device_id=0)
-from mindformers.trainer import Trainer
-# 初始化预训练任务
-trainer = Trainer(task='translation', model='t5_small', train_dataset="your data file path")
-
-# 方式1: 开启训练，并使用训练好的权重进行推理
-trainer.train()
-res = trainer.predict(predict_checkpoint=True, input_data="translate the English to Romanian: a good boy!")
-print(res)
-#[{'translation_text': ['un băiat bun!']}]
-
-# 方式2： 从obs下载训练好的权重并进行推理
-res = trainer.predict(input_data="translate the English to Romanian: a good boy!")
-print(res)
-#[{'translation_text': ['un băiat bun!']}]
-```
-
-- pipeline接口开启快速推理
-
-```python
-from mindformers.pipeline import pipeline
-pipeline_task = pipeline("translation", model='t5_small')
-pipeline_result = pipeline_task("translate the English to Romanian: a good boy!", top_k=3)
-print(pipeline_result)
-#[{'translation_text': ['un băiat bun!']}]
-```
-
-## 模型权重
-
-本仓库中的`t5_small`来自于HuggingFace的[`t5_small`](https://huggingface.co/t5-small), 基于下述的步骤获取：
-
-1. 从上述的链接中下载`t5_small`的HuggingFace权重，文件名为`pytorch_model.bin`
-
-2. 执行转换脚本，得到转换后的输出文件`mindspore_t5.ckpt`
-
-```shell
-python mindformers/models/t5/convert_weight.py --layers 6 --torch_path pytorch_model.bin --mindspore_path ./mindspore_t5.ckpt
-```
diff --git a/docs/model_cards/vit.md b/docs/model_cards/vit.md
deleted file mode 100644
index c38cf095..00000000
--- a/docs/model_cards/vit.md
+++ /dev/null
@@ -1,391 +0,0 @@
-# ViT
-
-## 模型描述
-
-ViT：全名Vision Transformer，不同于传统的基于CNN的网络结果，是基于Transformer结构的CV网络，2021年谷歌研究发表网络，在大数据集上表现了非常强的泛化能力。大数据任务（如：CLIP）基于该结构能有良好的效果。MindFormers提供的ViT权重及精度均是是基于MAE预训练ImageNet-1K数据集进行微调得到。
-
-[论文](https://gitee.com/link?target=https%3A%2F%2Farxiv.org%2Fabs%2F2010.11929): Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. 2021.
-
-## 模型性能
-
-- 基于Atlas 800
-
-|                            config                            |         task         |  Datasets   |    metric     | score  | [train performance](#预训练) | [prediction performance](#推理) |
-| :----------------------------------------------------------: | :------------------: | :---------: | :-----------: | :----: | :--------------------------: | :-----------------------------: |
-| [vit_base_p16](../../configs/mae/run_vit_base_p16_224_100ep.yaml) | image_classification | ImageNet-1K | Top1-Accuracy | 0.8372 |      262.31 samples/s/p      |          363.50 (fps)           |
-
-## 仓库介绍
-
-`ViT` 基于 `MindFormers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/models/vit`
-
-   ```bash
-   model
-       ├── __init__.py
-       ├── convert_weight.py         # 权重转换脚本
-       ├── vit.py                    # 模型实现
-       ├── vit_config.py             # 模型配置项
-       ├── vit_modules.py            # 模型所需模块
-       └── vit_processor.py          # Model预处理
-   ```
-
-2. 模型配置：`configs/vit`
-
-   ```bash
-   model
-       └── run_vit_base_p16_224_100ep.yaml         # vit_base模型启动配置
-   ```
-
-## 前期准备
-
-### [mindformers安装](path/to/README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换
-
-如果无需加载权重，或者使用from_pretrained功能自动下载，可以跳过此章节。
-
-MindFormers提供高级接口from_pretrained功能直接下载MindFormerBook中的[vit_base_p16.ckpt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/vit/vit_base_p16.ckpt)，无需手动转换。
-
-本仓库中的`vit_base_p16`来自于facebookresearch/mae的[ViT-Base](https://dl.fbaipublicfiles.com/mae/finetune/mae_finetuned_vit_base.pth), 如需手动下载权重，可参考以下示例进行转换：
-
-1. 从上述链接中下载`ViT-Base`的模型权重
-
-2. 执行转换脚本，得到转换后的输出文件`vit_base_p16.ckpt`
-
-```bash
-python mindformers/models/vit/convert_weight.py --torch_path "PATH OF ViT-Base.pth" --mindspore_path "SAVE PATH OF vit_base_p16.ckpt"
-```
-
-## 基于API的快速使用
-
-### 基于AutoClass的快速使用
-
-可以使用AutoClass接口，通过模型名称自动下载并加载权重
-
-`from_pretrained()` 接口会自动从云上下载预训练的模型，存储路径：`mindformers/checkpoint_download/vit`
-
-```python
-import mindspore
-from mindformers import AutoModel, AutoConfig
-from mindformers.tools.image_tools import load_image
-from mindformers import ViTImageProcessor
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-
-# 模型标志加载模型
-model = AutoModel.from_pretrained("vit_base_p16")
-
-#模型配置加载模型
-config = AutoConfig.from_pretrained("vit_base_p16")
-# {'patch_size': 16, 'in_chans': 3, 'embed_dim': 768, 'depth': 12, 'num_heads': 12, 'mlp_ratio': 4,
-# ..., 'batch_size': 32, 'image_size': 224, 'num_classes': 1000}
-model = AutoModel.from_config(config)
-
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-image_processor = ViTImageProcessor(size=224)
-processed_img = image_processor(img)
-
-predict_result = model(processed_img)
-print(predict_result)
-
-# output
-# (Tensor(shape=[1, 1000], dtype=Float32, value=
-# [[-5.38996577e-01, -2.30418444e-02,  2.06433788e-01 ... -6.59191251e-01,  8.57466936e-01,  6.56416774e-01]]), None)
-```
-
-### 基于Trainer的快速训练、评测、推理
-
-```python
-import mindspore
-from mindformers.trainer import Trainer
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-# 初始化任务
-vit_trainer = Trainer(
-    task='image_classification',
-    model='vit_base_p16',
-    train_dataset="imageNet-1k/train",
-    eval_dataset="imageNet-1k/val")
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-
-# 方式1：使用现有的预训练权重进行finetune， 并使用finetune获得的权重进行eval和推理
-vit_trainer.train(resume_or_finetune_from_checkpoint="mae_vit_base_p16", do_finetune=True)
-vit_trainer.evaluate(eval_checkpoint=True)
-predict_result = vit_trainer.predict(predict_checkpoint=True, input_data=img, top_k=3)
-print(predict_result)
-
-# 方式2: 从新开始训练，并使用训练好的权重进行eval和推理
-vit_trainer.train()
-vit_trainer.evaluate(eval_checkpoint=True)
-predict_result = vit_trainer.predict(predict_checkpoint=True, input_data=img, top_k=3)
-print(predict_result)
-
-# 方式3： 从obs下载训练好的权重并进行eval和推理
-vit_trainer.evaluate()
-predict_result = vit_trainer.predict(input_data=img, top_k=3)
-print(predict_result)
-
-# output
-# - mindformers - INFO - output result is: [[{'score': 0.8880876, 'label': 'daisy'},
-# {'score': 0.0049882396, 'label': 'bee'}, {'score': 0.0031068476, 'label': 'vase'}]]
-```
-
-### 基于Pipeline的快速推理
-
-```python
-import mindspore
-from mindformers.pipeline import pipeline
-from mindformers.tools.image_tools import load_image
-
-# 指定图模式，指定使用训练卡id
-mindspore.set_context(mode=0, device_id=0)
-pipeline_task = pipeline("image_classification", model='vit_base_p16')
-img = load_image("https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-pipeline_result = pipeline_task(img, top_k=3)
-print(pipeline_result)
-
-# output
-# [[{'score': 0.8880876, 'label': 'daisy'}, {'score': 0.0049882396, 'label': 'bee'},
-# {'score': 0.0031068476, 'label': 'vase'}]]
-```
-
- Trainer和pipeline接口默认支持的task和model关键入参
-
-|    task（string）    | model（string） |
-| :------------------: | :-------------: |
-| image_classification |  vit_base_p16   |
-
-## 预训练
-
-### 数据集准备-预训练
-
-使用的数据集：[ImageNet2012](http://www.image-net.org/)
-
-- 数据集大小：125G，共1000个类、125万张彩色图像
-    - 训练集：120G，共120万张图像
-    - 测试集：5G，共5万张图像
-- 数据格式：RGB
-
- ```text
-数据集目录格式
-└─imageNet-1k
-    ├─train                # 训练数据集
-    └─val                  # 评估数据集
- ```
-
-### 脚本启动
-
-#### 单卡训练
-
-- python启动
-
-```bash
-# pretrain
-python run_mindformer.py --config ./configs/vit/run_vit_base_p16_224_100ep.yaml --run_mode train
-```
-
-#### 多卡训练
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-- 单机多卡
-
-```bash
-cd scripts
-bash run_distribute.sh RANK_TABLE_FILE ../configs/vit/run_vit_base_p16_224_100ep.yaml [0,8] train 8
-```
-
-多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-- 多机多卡
-
-在每台机器上启动`bash run_distribute.sh`。
-
-**注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-```bash
-server_count=12
-device_num=8*$server_count
-# launch ranks in the 0th server
-cd scripts
-bash run_distribute.sh $RANK_TABLE_FILE ../configs/vit/run_vit_base_p16_224_100ep.yaml [0,8] train $device_num
-
-# launch ranks in the 1-11 server via ssh
-for idx in {1..11}
-do
-    let rank_start=8*$idx
-    let rank_end=$rank_start+8
-    ssh ${IP_LIST[$idx]} "cd scripts; bash run_distribute.sh $RANK_TABLE_FILE ../configs/vit/run_vit_base_p16_224_100ep.yaml [$rank_start,$rank_end] train $device_num"
-done
-```
-
-其中
-
-- `RANK_TABLE_FILE`为上一步汇总并分发的总rank table文件；
-- `IP_LIST`为12台服务器的IP地址。如192.168.0.[0-11]
-
-```bash
-IP_LIST=("192.168.0.0", "192.168.0.1", ..., "192.168.0.11")
-```
-
-## 评测
-
-### 图像分类
-
-### 数据集准备-图像分类
-
-参考[数据集准备-预训练](#数据集准备-预训练)
-
-### 脚本启动
-
-#### 单卡评测
-
-```bash
-# evaluate
-python run_mindformer.py --config ./configs/vit/run_vit_base_p16_224_100ep.yaml --run_mode eval --eval_dataset_dir [DATASET_PATH]
-# output
-# ViT： Top1 Accuracy = {'Top1 Accuracy': 0.8371678937259923}
-```
-
-## 推理
-
-### 脚本启动
-
-#### 单卡推理
-
-```bash
-# predict
-python run_mindformer.py --config ./configs/vit/run_vit_base_p16_224_100ep.yaml --run_mode predict --predict_data [PATH_TO_IMAGE]
-```
diff --git a/docs/model_support_list.md b/docs/model_support_list.md
index 682f8be8..b3bbd576 100644
--- a/docs/model_support_list.md
+++ b/docs/model_support_list.md
@@ -6,31 +6,26 @@
 
 |        模型 <br> model        | 模型规格<br>type      | 数据集 <br> dataset | 评估指标 <br> metric | 评估得分 <br> score |                               配置<br>config                               |
 |:---------------------------:|-------------------|:----------------:|:----------------:|:---------------:|:------------------------------------------------------------------------:|
-| [bert](model_cards/bert.md) | bert_base_uncased |       wiki       |        -         |        -        | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/bert) |
 
 ### [text_classification](task_cards/text_classification.md)
 
 |                  模型 <br> model                   | 模型规格<br/>type                                              | 数据集 <br> dataset |     评估指标 <br> metric     | 评估得分 <br> score |                                配置<br>config                                |
 |:------------------------------------------------:|------------------------------------------------------------|:----------------:|:------------------------:|:---------------:|:--------------------------------------------------------------------------:|
-| [txtcls_bert](task_cards/text_classification.md) | txtcls_bert_base_uncased<br/>txtcls_bert_base_uncased_mnli |  Mnli <br> Mnli  | Entity F1 <br> Entity F1 | - <br>   84.80% | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/txtcls) |
 
 ### [token_classification](task_cards/token_classification.md)
 
 |                   模型 <br> model                   | 模型规格<br/>type                                                 |   数据集 <br> dataset   |     评估指标 <br> metric     | 评估得分 <br> score  |                                配置<br>config                                |
 |:-------------------------------------------------:|---------------------------------------------------------------|:--------------------:|:------------------------:|:----------------:|:--------------------------------------------------------------------------:|
-| [tokcls_bert](task_cards/token_classification.md) | tokcls_bert_base_chinese<br/>tokcls_bert_base_chinese_cluener | CLUENER <br> CLUENER | Entity F1 <br> Entity F1 | - <br>    0.7905 | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/tokcls) |
 
 ### [question_answering](task_cards/question_answering.md)
 
 |                模型 <br> model                | 模型规格<br/>type                                         |      数据集 <br> dataset      |   评估指标 <br> metric   |   评估得分 <br> score    |                              配置<br>config                              |
 |:-------------------------------------------:|-------------------------------------------------------|:--------------------------:|:--------------------:|:--------------------:|:----------------------------------------------------------------------:|
-| [qa_bert](task_cards/question_answering.md) | qa_bert_base_uncased<br/>qa_bert_base_chinese_uncased | SQuAD v1.1 <br> SQuAD v1.1 | EM / F1 <br> EM / F1 | 80.74 / 88.33 <br> - | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/qa) |
 
 ### translation
 
 |      模型 <br> model      | 模型规格<br/>type | 数据集 <br> dataset | 评估指标 <br> metric | 评估得分 <br> score |                              配置<br>config                              |
 |:-----------------------:|---------------|:----------------:|:----------------:|:---------------:|:----------------------------------------------------------------------:|
-| [t5](model_cards/t5.md) | t5_small      |      WMT16       |        -         |        -        | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/t5) |
 
 ### [text_generation](task_cards/text_generation.md)
 
@@ -58,14 +53,11 @@
 
 |       模型 <br> model       | 模型规格<br/>type    | 数据集 <br> dataset | 评估指标 <br> metric | 评估得分 <br> score |                                                配置<br>config                                                 |
 |:-------------------------:|------------------|:----------------:|:----------------:|:---------------:|:-----------------------------------------------------------------------------------------------------------:|
-| [mae](model_cards/mae.md) | mae_vit_base_p16 |   ImageNet-1k    |        -         |        -        | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/mae/run_mae_vit_base_p16_224_800ep.yaml) |
 
 ### [image_classification](task_cards/image_classification.md)
 
 |        模型 <br> model        | 模型规格<br/>type  | 数据集 <br> dataset | 评估指标 <br> metric | 评估得分 <br> score |                                                配置<br>config                                                |
 |:---------------------------:|----------------|:----------------:|:----------------:|:---------------:|:----------------------------------------------------------------------------------------------------------:|
-|  [vit](model_cards/vit.md)  | vit_base_p16   |   ImageNet-1k    |     Accuracy     |     83.71%      |  [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/vit/run_vit_base_p16_224_100ep.yaml)   |
-| [swin](model_cards/swin.md) | swin_base_p4w7 |   ImageNet-1k    |     Accuracy     |     83.44%      | [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/swin/run_swin_base_p4w7_224_100ep.yaml) |
 
 ## Multi-Modal
 
@@ -73,8 +65,6 @@
 
 |                  模型 <br> model                  | 模型规格<br/>type                                                            |                  数据集 <br> dataset                  |                      评估指标 <br> metric                       |              评估得分 <br> score               |                                                         配置<br>config                                                          |
 |:-----------------------------------------------:|--------------------------------------------------------------------------|:--------------------------------------------------:|:-----------------------------------------------------------:|:------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------:|
-|           [clip](model_cards/clip.md)           | clip_vit_b_32<br/>clip_vit_b_16 <br/>clip_vit_l_14<br/>clip_vit_l_14@336 | Cifar100 <br> Cifar100 <br> Cifar100 <br> Cifar100 | Accuracy   <br>  Accuracy   <br>  Accuracy   <br>  Accuracy | 57.24% <br> 61.41% <br> 69.67% <br> 68.19% |       [configs](https://gitee.com/mindspore/mindformers/tree/dev/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml)       |
-| [visualglm](../research/visualglm/visualglm.md) | visualglm                                                                |                    fewshot-data                    |                              -                              |                     -                      | [configs](https://gitee.com/mindspore/mindformers/tree/dev/research/visualglm/run_visualglm_6b_image_to_text_generation.yaml) |
 
 ### image_to_text_generation
 
diff --git a/mindformers/mindformer_book.py b/mindformers/mindformer_book.py
index 7aecc1df..2bb29092 100644
--- a/mindformers/mindformer_book.py
+++ b/mindformers/mindformer_book.py
@@ -253,16 +253,10 @@ class MindFormerBook:
                 _PROJECT_PATH, "configs/bloom/run_bloom_65b.yaml")),
             ("bloom_176b", os.path.join(
                 _PROJECT_PATH, "configs/bloom/run_bloom_176b.yaml")),
-            ("baichuan_7b", os.path.join(
-                _PROJECT_PATH, "research/baichuan/run_baichuan_7b.yaml")),
             ("baichuan2_7b", os.path.join(
                 _PROJECT_PATH, "research/baichuan2/run_baichuan2_7b.yaml")),
             ("baichuan2_13b", os.path.join(
                 _PROJECT_PATH, "research/baichuan2/run_baichuan2_13b.yaml")),
-            ("ziya_13b", os.path.join(
-                _PROJECT_PATH, "research/ziya/run_ziya_13b.yaml")),
-            ("skywork_13b", os.path.join(
-                _PROJECT_PATH, "research/skywork/run_skywork_13b.yaml")),
             ("internlm_7b", os.path.join(
                 _PROJECT_PATH, "research/internlm/run_internlm_7b.yaml")),
             ("internlm_7b_lora", os.path.join(
@@ -430,16 +424,10 @@ class MindFormerBook:
                 _PROJECT_PATH, "configs/bloom/run_bloom_65b.yaml")),
             ("bloom_176b", os.path.join(
                 _PROJECT_PATH, "configs/bloom/run_bloom_176b.yaml")),
-            ("baichuan_7b", os.path.join(
-                _PROJECT_PATH, "research/baichuan/run_baichuan_7b.yaml")),
             ("baichuan2_7b", os.path.join(
                 _PROJECT_PATH, "research/baichuan2/run_baichuan2_7b.yaml")),
             ("baichuan2_13b", os.path.join(
                 _PROJECT_PATH, "research/baichuan2/run_baichuan2_13b.yaml")),
-            ("ziya_13b", os.path.join(
-                _PROJECT_PATH, "research/ziya/run_ziya_13b.yaml")),
-            ("skywork_13b", os.path.join(
-                _PROJECT_PATH, "research/skywork/run_skywork_13b.yaml")),
             ("internlm_7b", os.path.join(
                 _PROJECT_PATH, "research/internlm/run_internlm_7b.yaml")),
             ("internlm_7b_lora", os.path.join(
diff --git a/research/baichuan/baichuan.md b/research/baichuan/baichuan.md
deleted file mode 100644
index f865bc75..00000000
--- a/research/baichuan/baichuan.md
+++ /dev/null
@@ -1,240 +0,0 @@
-# Baichuan
-
-百川大模型系列是由百川智能研究的大规模语言预训练模型，目前有Baichuan-7B、Baichuan-13B-base和Baichuan-13B-Chat三个系列。目前MindFormers已全部支持。
-
-**注: 7B与13B实现方式不同，请参考对应参数的文档进行使用**
-
-## Baichuan-7B
-
-Baichuan-7B 是由百川智能开发的一个开源可商用的大规模预训练语言模型。基于 Transformer 结构，在大约 1.2 万亿 tokens 上训练的 70 亿参数模型，支持中英双语，上下文窗口长度为 4096。在标准的中文和英文 benchmark（C-Eval/MMLU）上均取得同尺寸最好的效果。
-
-Baichuan-7B 是采用llama的模型结构设计，模型实现我们复用llama的代码。
-
-``` text
-Model Description
-Developed by: 百川智能(Baichuan Intelligent Technology)
-Email: opensource@baichuan-inc.com
-Language(s) (NLP): Chinese/English
-License: Baichuan-7B License
-```
-
-### 快速使用
-
-#### Baichuan-7B 预训练权重转换
-
-从huggingface下载[Baichuan-7B](https://huggingface.co/baichuan-inc/Baichuan-7B/tree/main);需要将整个工程下载下来。
-
-执行权重转换脚本
-
-```shell
-python research/baichuan/convert_weight.py --torch_ckpt_path TORCH_CKPT_PATH --mindspore_ckpt_path MS_CKPT_NAME
-```
-
-```text
-# 参数说明
-torch_ckpt_path: huggingface权重保存目录下任意权重bin文件，根据该文件路径读取目录下所有权重
-mindspore_ckpt_path: mindspore权重文件保存路径
-```
-
-#### [多卡权重切分](../../docs/feature_cards/Transform_Ckpt.md#方案1源码执行)
-
-#### 脚本启动
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-> `遵从Baichuan-7B的license，本模型需要用户自行下载权重进行处理，故使用时和llama存在一定区别，具体如下：`
-
-- Trainer接口开启训练/推理：
-
-```python
-from mindformers.trainer import Trainer
-
-# 在使用Trainer接口进行训练推理时，由于百川模型的tokenizer需要用户自行下载，因此在启动前，请先行在配置文件中将tokenizer.model的路径配置完成，具体修改如下
-# 增加 vocab_file: '/path/Baichuan-7B/tokenizer.model'，这样一个配置即可
-#processor:
-#  return_tensors: ms
-#  tokenizer:
-#    unk_token: '<unk>'
-#    bos_token: '<s>'
-#    eos_token: '</s>'
-#    pad_token: '<pad>'
-#    vocab_file: '/path/Baichuan-7B/tokenizer.model'
-#    type: LlamaTokenizer
-
-# 初始化预训练任务
-trainer = Trainer(task='text_generation', model='baichuan_7b', train_dataset="your data file path")
-# 方式1: 开启训练，并使用训练好的权重进行推理
-trainer.train()
-res = trainer.predict(predict_checkpoint=True, input_data="I love Beijing, because")
-
-# 方式2： 使用自行下载的Baichuan-7B权重并进行推理
-baichuan_model_path = "/path/Baichuan-7B/transform.ckpt" # Baichuan-7B ckpt path
-res = trainer.predict(predict_checkpoint=baichuan_model_path, input_data="I love Beijing, because")
-```
-
-- pipeline接口开启快速推理
-
-```python
-from mindspore import context
-from mindformers.pipeline import pipeline
-from mindformers import LlamaForCausalLM, LlamaConfig, LlamaTokenizer
-
-context.set_context(device_id=3)
-# init baichuan-7b model
-baichuan_model_path = "/path/Baichuan-7B/transform.ckpt" # Baichuan-7B ckpt path
-baichuan_config = LlamaConfig(
-    vocab_size=64000,
-    pad_token_id=0,
-    checkpoint_name_or_path=baichuan_model_path,
-    use_past=True
-)
-baichuan_model = LlamaForCausalLM(
-    config=baichuan_config
-)
-# init baichuan-7b tokenizer
-tokenizer_path = "/path/Baichuan-7B/tokenizer.model" # Baichuan-7B tokenizer.model path
-tokenizer = LlamaTokenizer(
-    vocab_file=tokenizer_path
-)
-pipeline_task = pipeline("text_generation", model=baichuan_model, tokenizer=tokenizer, max_length=32)
-peline_result = pipeline_task("登鹳雀楼->王之涣\n夜雨寄北->", top_k=3, do_sample=True, top_p=0.95, repetition_penalty=1.1, max_length=256)
-
-print(peline_result)
-```
-
-#### 训练与微调
-
-基于Baichuan-7B，目前提供了模型的基础配置文件`configs/baichuan/run_baichuan_7b.yaml`。可参考[llama](https://gitee.com/mindspore/mindformers/blob/dev/docs/model_cards/llama.md)的训练与微调章节进行数据准备，而后启动微调，不在此赘述。
-
-`注：使用Baichuan-7B进行训练或者微调时，需要使用Baichuan-7B配套的tokenizer.model处理数据集，以及选用Baichuan-7B的yaml配置文件进行任务启动。`
-
-## Baichuan-13B-base
-
-Baichuan-13B 是由百川智能继 Baichuan-7B 之后开发的包含 130 亿参数的开源可商用的大规模语言模型，在权威的中文和英文 benchmark 上均取得同尺寸最好的效果。
-
-Baichuan-13B 有如下几个特点：
-
-- **更大尺寸、更多数据**：Baichuan-13B 在 Baichuan-7B 的基础上进一步扩大参数量到 130 亿，并且在高质量的语料上训练了 1.4 万亿 tokens，超过 LLaMA-13B 40%，是当前开源 13B 尺寸下训练数据量最多的模- 型。支持中英双语，使用 ALiBi 位置编码，上下文窗口长度为 4096。
-- **同时开源预训练和对齐模型**：预训练模型是适用开发者的『 基座 』，而广大普通用户对有对话功能的对齐模型具有更强的需求。
-- **更高效的推理**：为了支持更广大用户的使用，百川智能同时开源了 int8 和 int4 的量化版本，相对非量化版本在几乎没有效果损失的情况下大大降低了部署的机器资源门槛。
-- **开源免费可商用**：Baichuan-13B 不仅对学术研究完全开放，开发者也仅需邮件申请并获得官方商用许可后，即可以免费商用。
-
-``` text
-Model Description
-Developed by: 百川智能(Baichuan Intelligent Technology)
-Email: opensource@baichuan-inc.com
-Language(s) (NLP): Chinese/English
-License: Baichuan-13B-base License
-```
-
-### 快速使用
-
-#### Baichuan-13B-Base/Chat 权重转换
-
-从huggingface下载[Baichuan-13B-base](https://huggingface.co/baichuan-inc/Baichuan-13B-Base/tree/main)或者[Baichuan-13B-chat](https://huggingface.co/baichuan-inc/Baichuan-13B-Chat/tree/main)，需要将整个工程下载下来。
-
-执行权重转换脚本
-
-```shell
-python research/baichuan/convert_weight.py --torch_ckpt_path TORCH_CKPT_PATH --mindspore_ckpt_path MS_CKPT_NAME
-```
-
-```text
-# 参数说明
-torch_ckpt_path: huggingface权重保存目录下任意权重bin文件,根据该文件路径读取目录下全部权重
-mindspore_ckpt_path: mindspore权重文件保存路径
-```
-
-#### [多卡权重切分](../../docs/feature_cards/Transform_Ckpt.md#方案1源码执行)
-
-非单卡运行，无论是train, finetune, eval, predict均需要把权重按照并行配置进行切分！
-
-#### 脚本启动Baichuan-13B-Base
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](../../README.md#二mindformers安装)
-> `遵从Baichuan-13B-base的license，本模型需要用户自行下载权重进行处理`
-
-`Baichuan-13B-base`的高阶接口使用脚本已集成在`run_baichuan_13b_base.py`脚本中
-
-**注1**：由于模型较大，Atlas 800不支持单卡推理，不支持单机8卡训练。如果使用Atlas 800进行单卡推理，需要修改`run_baichuan_13b.yaml`中`seq_length`为1024。
-
-**注2**：增量推理需要修改`run_baichuan_13b.yaml`中`use_past`为True。
-
-**注3**：使用predict前需要下载baichuan13b的tokenizer文件，并且在`baichuan/run_baichuan_13b.yaml`该文件中修改tokenzier路径到hugging face`Baichuan-13B-Base/tokenizer.model`文件
-
-- Atlas 800T A2单卡eval示例
-
-```shell
-cd mindformers/research
-python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b_910b.yaml --load_checkpoint path/to/baichuan_13b.ckpt --run_mode=eval --eval_data path/to/mindrecord_dir --use_parallel False
-```
-
-- Atlas 800T A2单卡predict示例
-
-```shell
-cd mindformers/research
-python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b_910b.yaml --load_checkpoint path/to/baichuan_13b.ckpt --run_mode=predict --predict_data TLS1.2协议的基本流程 --predict_length 100 --use_parallel False
-#运行结果：[{'text_generation_text': ['TLS1.2协议的基本流程如下: 1.客户端向服务器发送一个ClientHello消息,其中包含客户端支持的加密算法、压缩算法、随机数、客户端支持的扩展等信息。 2.服务器收到ClientHello消息后,向客户端发送一个ServerHello消息,其中包含服务器支持的加密算法、压缩算法、随机数、服务器支持的扩展等信息。 3.客户端收到ServerHello消息后,向服务']}]
-```
-
-- 单机多卡运行eval示例
-
-```shell
-cd mindformers/research
-bash run_singlenode.sh "python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b.yaml --load_checkpoint path/to/baichuan_13b_ckpt_dp1mp2 --run_mode=eval --eval_data path/to/mindrecord_dir" path/to/rank_table_file [0,2] 2
-```
-
-**注意，此处load checkpoint后的路径为多卡切分权重**
-
-- 单机多卡运行predict示例
-
-```shell
-cd mindformers/research
-bash run_singlenode.sh "python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b.yaml --load_checkpoint path/to/baichuan_13b_ckpt_dp1mp2 --run_mode=predict --predict_data TLS1.2协议的基本流程 --predict_length 100" path/to/rank_table_file [0,2] 2
-#运行结果：[{'text_generation_text': ['TLS1.2协议的基本流程如下: 1.客户端向服务器发送一个ClientHello消息,其中包含客户端支持的加密算法、压缩算法、随机数、客户端支持的扩展等信息。 2.服务器收到ClientHello消息后,向客户端发送一个ServerHello消息,其中包含服务器支持的加密算法、压缩算法、随机数、服务器支持的扩展等信息。 3.客户端收到ServerHello消息后,向服务']}]
-```
-
-**注意，此处load checkpoint后的路径为多卡切分权重**
-
-- 多机多卡运行train示例
-
-```shell
-# node 1
-cd mindformers/research
-bash run_multinode.sh "python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b.yaml --load_checkpoint path/to/baichuan_13b_ckpt_dp1mp2 --run_mode=train --train_data path/to/mindrecord_dir" path/to/rank_table_file [0,8] 16
-# node 2
-cd mindformers/research
-bash run_multinode.sh "python baichuan/run_baichuan_13b_base.py --config baichuan/run_baichuan_13b.yaml --load_checkpoint path/to/baichuan_13b_ckpt_dp1mp2 --run_mode=train --train_data path/to/mindrecord_dir" .path/to/rank_table_file [8,16] 16
-```
-
-**参数说明**
-  `config`: huggingface权重保存目录路径(即刚刚从hugging face下载的工程目录)
-  `load_checkpoint`: 推理所使用的的权重，需从huggingface获取，通过conver_weight转换为mindspore单卡权重，参考[权重切分](../../docs/feature_cards/Transform_Ckpt.md)转换为多卡权重
-  `run_mode`：运行模式，包括train，finetune，eval，predict
-  `train_data`：train数据，训练时需要填入，数据获取方法参考[llama数据准备](../../docs/model_cards/llama.md#数据集准备)，注意tokenzier需使用baichuan的。
-  `eval_data`：eval数据，eval是需要填入，同train。
-  `predict_data`：predict数据，predict时需要填入
-
-  更多输入可参考`run_baichuan_13b_base.py`脚本内入参
-
-#### 脚本启动Baichuan-13B-Chat
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](../../README.md#二mindformers安装)
-> `遵从Baichuan-13B-chat的license，本模型需要用户自行下载权重进行处理`
-
-`Baichuan-13B-chat`的高阶接口使用脚本已集成在`run_baichuan_13b_chat.py`脚本中
-
-```shell
-cd mindformers/research
-python baichuan/run_baichuan_13b_chat.py --config baichuan --load_checkpoint path/to/baichuan_13b.ckpt --max_new_tokens 512
-#请输入：世界上第二高的山峰是哪座？
-#世界上第二高的山峰是喀喇昆仑山脉的乔戈里峰(K2)，海拔8,611米(28,251英尺)。它位于巴基斯坦和中国边境附近，是喀喇昆仑山脉的最高峰峰。</s>
-#请输入：那第三高的山峰呢？
-#世界第三高的山峰是喜马拉雅山脉的康峰(Kangchenjunga)，海拔8,586米(28,169英尺)。它位于尼泊尔和印度边境附近，是世界上最高的14座山峰之一一。</s>
-#请输入：我想攀爬高峰，在前面说的两座高峰里，你推荐我先爬哪一座？
-#在选择攀爬的顺序时，需要考虑多种因素，如个人体能、技能水平、时间限制等。以下是一些建议供您参考：...(省略更多输出)
-```
-
-**参数说明**
-  `config`: 用于生成tokenizer的配置文件，路径指定到文件夹，需把yaml文件单独放置于一个文件夹内
-  `load_checkpoint`: 推理所使用的的权重，需从huggingface获取，通过conver_weight转换为mindspore单卡权重，参考[权重切分](../../docs/feature_cards/Transform_Ckpt.md)转换为多卡权重
-  `max_new_tokens`: 最大生成tokens数，多轮对话时，如果记忆的总tokens大于`seq_length-max_new_tokens`会遗忘以前的对话。
diff --git a/research/baichuan/baichuan_13b.py b/research/baichuan/baichuan_13b.py
deleted file mode 100644
index f8c0b091..00000000
--- a/research/baichuan/baichuan_13b.py
+++ /dev/null
@@ -1,883 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Baichuan_13b models' APIs."""
-from typing import Optional
-import math
-import numpy as np
-
-try:
-    from mindspore._checkparam import Validator
-except ImportError:
-    import mindspore._checkparam as Validator
-from mindspore import Tensor, nn, ops
-import mindspore.common.dtype as mstype
-from mindspore.common.parameter import Parameter
-from mindspore.ops import operations as P
-
-from mindformers.core.loss.loss import CrossEntropyLoss
-from mindformers.modules.flash_attention import FlashAttention
-from mindformers.models.modeling_utils import PreTrainedModel
-from mindformers.models.utils import lazy_inline
-from mindformers.modules.transformer.op_parallel_config import _check_config
-from mindformers.modules.transformer import AttentionMask, TransformerOpParallelConfig
-from mindformers.modules.layers import Linear, _check_input_dtype, AlibiTensor
-from mindformers.tools.register.register import MindFormerModuleType, MindFormerRegister
-
-from mindformers.models.utils import set_layer_stage_recompute
-from mindformers.models.llama.llama_config import LlamaConfig
-from mindformers.models.llama.llama_layer import LlamaEmbedding, LlamaFeedForward, LlamaRMSNorm
-from mindformers.tools.logger import logger
-
-
-__all__ = ['Baichuan13BForCausalLM', 'Baichuan13BModel']
-
-
-class BaichuanPreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = LlamaConfig
-    base_model_prefix = "baichuan"
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class Baichuan13BForCausalLM(BaichuanPreTrainedModel):
-    r"""
-        Provide baichuan_13B training loss or logits through network.
-        Args:
-            config (LlamaConfig): The config of baichuan_13B model.
-
-        Inputs:
-            input_ids(Tensor): the tokenized inputs with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
-            labels(Tensor): the tokenized labels with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
-            input_position(Tensor): current position, used by model.predict.
-            position_ids(Tensor): Reserved param, not used.
-            attention_mask(Tensor): Reserved param, not used.
-            input_embeds(Tensor): Reserved param, not used.
-            init_reset(bool, optional): A bool tensor with shape [1], used to clear the past key parameter and
-              past value parameter used in the incremental prediction. Default True.
-            batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental
-              prediction. Tensor of shape :math:`(batch_size,)`. Default None.
-
-        Returns:
-            Tensor, the loss or logits of the network.
-
-        Examples:
-            >>> from mindformers.models.llama import LlamaConfig
-            >>> from research.baichuan.baichuan_13b import Baichuan13BForCausalLM
-            >>> config = LlamaConfig(batch_size=2)
-            >>> network = Baichuan13BForCausalLM(config=config)
-        """
-
-    @lazy_inline
-    def __init__(self, config: LlamaConfig = None):
-        super(Baichuan13BForCausalLM, self).__init__(config, auto_prefix=True)
-        _check_config(config.parallel_config)
-        self.ignore_token_id = config.ignore_token_id
-        self.pad_token_id = config.pad_token_id
-
-        self.reshape = P.Reshape()
-        self.cast = P.Cast()
-        self.slice = P.StridedSlice()
-        self.not_equal = P.NotEqual()
-        self.mul = P.Mul()
-        self.add = P.Add()
-        self.model = Baichuan13BModel(config=config)
-        self.lm_head = Linear(in_channels=config.hidden_size,
-                              out_channels=config.vocab_size,
-                              has_bias=False,
-                              compute_dtype=config.compute_dtype,
-                              param_init_type=config.param_init_type,
-                              weight_init="normal")  # meta default: xavier_normal
-        self.loss = CrossEntropyLoss(parallel_config=config.parallel_config)
-
-        dp = config.parallel_config.data_parallel
-        mp = config.parallel_config.model_parallel
-
-        self.slice.shard(((dp, 1),))
-        self.not_equal.shard(((dp, 1), ()))
-        self.mul.shard(((dp, 1), (dp, 1)))
-        self.add.shard(((dp, 1), ()))
-        if config.parallel_config.vocab_emb_dp:
-            self.lm_head.shard(strategy_matmul=((dp, 1), (1, 1)))
-        else:
-            self.lm_head.shard(strategy_matmul=((dp, 1), (mp, 1)))
-        if config.parallel_config.pipeline_stage > 1:
-            self.lm_head.pipeline_stage = config.parallel_config.pipeline_stage - 1
-
-        self.load_checkpoint(config)
-
-    # pylint: disable=W0613
-    def prepare_inputs_for_generation(self, input_ids, **kwargs):
-        return {
-            "input_ids": Tensor(input_ids, mstype.int32)
-        }
-
-    def add_flags_custom(self, is_first_iteration):
-        """Add customized attributes for specific cells in the model."""
-        self.add_flags(is_first_iteration=is_first_iteration)
-        self.model.add_flags(is_first_iteration=is_first_iteration)
-        for layer in self.model.layers:
-            layer.add_flags(is_first_iteration=is_first_iteration)
-            layer.attention.add_flags(is_first_iteration=is_first_iteration)
-
-    # pylint: disable=W0613
-    def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,
-                  input_embeds=None, init_reset=True, batch_valid_length=None):
-        """Baichuan13BForCausalLM forward."""
-        bsz, seqlen = input_ids.shape
-        if self.training:
-            tokens = self.slice(input_ids, (0, 0), (bsz, seqlen - 1), (1, 1))
-        else:
-            tokens = input_ids
-
-        output = self.model(tokens, input_position,
-                            init_reset, batch_valid_length)
-        logits = self.lm_head(output)
-
-        input_mask = self.cast(self.not_equal(
-            tokens, self.pad_token_id), mstype.float32)
-        if labels is None:
-            labels = self.slice(input_ids, (0, 1), (bsz, seqlen), (1, 1))
-        else:
-            if labels.ndim > 1:
-                if self.training:
-                    labels = self.slice(labels, (0, 1), (bsz, seqlen), (1, 1))
-                label_mask = self.cast(self.not_equal(
-                    labels, self.ignore_token_id), mstype.float32)
-                input_mask = self.mul(input_mask, label_mask)
-
-        logits = self.cast(logits, mstype.float32)
-        if not self.training:
-            logits = self.reshape(logits, (bsz, seqlen, -1))
-            # makes cast effective to avoid allgather issue in Mindspore1.10
-            input_mask = self.add(input_mask, 1)
-            return logits, tokens, input_mask
-
-        if logits.ndim > 2:
-            logits = self.reshape(logits, (-1, logits.shape[-1]))
-        labels = self.reshape(labels, (-1,))
-        input_mask = self.reshape(input_mask, (-1,))
-        loss = self.loss(logits, labels, input_mask)
-        return loss
-
-
-class Baichuan13BModel(BaichuanPreTrainedModel):
-    r"""
-    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`Baichuan13BDecoderLayer`]
-    Args:
-        config(LlamaConfig): the config of network
-
-    Inputs:
-        input_ids: the tokenized inputs with datatype int32
-
-    Returns:
-        output: Tensor, the output of baichuan_13b decoderlayer
-    """
-
-    def __init__(self,
-                 config: LlamaConfig = None):
-        super().__init__(config, auto_prefix=True)
-        _check_config(config.parallel_config)
-        if config.batch_size or config.use_past:
-            Validator.check_positive_int(config.batch_size)
-        self.dtype = config.compute_dtype
-        self.num_layers = config.num_layers
-        self.pad_token_id = config.pad_token_id
-        self.is_first_iteration = True
-        self.use_past = config.use_past
-        self.use_flash_attention = config.use_flash_attention
-        if self.use_flash_attention:
-            logger.info("Enable flash attention.")
-        elif config.use_flash_attention:
-            logger.info("Current MindSpore do not support flash attention.")
-
-        self.get_attention_mask = AttentionMask(
-            config.seq_length, parallel_config=config.parallel_config.dp_mp_config).to_float(config.compute_dtype)
-        self.multiply_data = Tensor([-10000.0], dtype=config.compute_dtype)
-        self.one = Tensor([1.0], dtype=config.compute_dtype)
-        self.reshape = P.Reshape()
-        self.cast = P.Cast()
-        self.tile = P.Tile()
-        self.mul_mask = P.Mul()
-        self.sub = P.Sub()
-        self.expand_dims = P.ExpandDims()
-        self.not_equal = P.NotEqual()
-
-        self.tok_embeddings = LlamaEmbedding(
-            config.vocab_size, config.hidden_size, param_init_type=config.param_init_type)
-        self.layers = nn.CellList()
-        for layer_id in range(config.num_layers):
-            layer = Baichuan13BDecodeLayer(config.batch_size,
-                                           config.seq_length,
-                                           layer_id,
-                                           dim=config.hidden_size,
-                                           n_heads=config.num_heads,
-                                           multiple_of=config.multiple_of,
-                                           n_kv_heads=config.n_kv_heads,
-                                           ffn_dim_multiplier=config.ffn_dim_multiplier,
-                                           norm_eps=config.rms_norm_eps,
-                                           compute_dtype=config.compute_dtype,
-                                           layernorm_compute_dtype=config.layernorm_compute_type,
-                                           softmax_compute_dtype=config.softmax_compute_type,
-                                           param_init_type=config.param_init_type,
-                                           use_past=config.use_past,
-                                           use_flash_attention=config.use_flash_attention,
-                                           compute_in_2d=config.compute_in_2d,
-                                           use_past_shard=config.use_past_shard,
-                                           parallel_config=config.parallel_config)
-            set_layer_stage_recompute(layer, layer_id, config.offset, config.parallel_config, config.num_layers)
-            self.layers.append(layer)
-        self.norm_out = LlamaRMSNorm(config.hidden_size, config.rms_norm_eps,
-                                     compute_type=config.layernorm_compute_type)
-
-        self.build_alibi_tensor = AlibiTensor(
-            seq_length=config.seq_length, num_heads=config.num_heads, parallel_config=config.parallel_config)
-
-        dp = config.parallel_config.data_parallel
-        self.tok_embeddings.pipeline_stage = 0
-        if config.parallel_config.pipeline_stage > 1:
-            self.norm_out.pipeline_stage = config.parallel_config.pipeline_stage - 1
-            self.tok_embeddings.set_comm_fusion(2)
-            self.norm_out.set_comm_fusion(2)
-        else:
-            self.tok_embeddings.set_comm_fusion(
-                config.parallel_config.gradient_aggregation_group)
-            self.norm_out.set_comm_fusion(
-                config.parallel_config.gradient_aggregation_group)
-
-        self.tok_embeddings.shard(config.parallel_config)
-
-        self.tile.shard(((1, 1, 1, 1), ()))
-        self.sub.shard(((1,), (dp, 1, 1)))
-        self.mul_mask.shard(((dp, 1, 1, 1), (1,)))
-        self.expand_dims.shard(((dp, 1, 1),))
-        self.not_equal.shard(((dp, 1), ()))
-        if config.compute_in_2d:
-            self.norm_out.shard((dp, 1))
-        else:
-            self.norm_out.shard((dp, 1, 1))
-
-        if self.use_past:
-            seq_range = np.arange(config.seq_length).reshape(1, 1, -1)
-            self.ones = P.Ones()
-            self.range = Tensor(
-                np.tile(seq_range, (config.batch_size, 1, 1)), mstype.int32)
-            self.le_past = P.LessEqual()
-            self.input_mask_all_ones = Tensor(
-                np.ones((self.config.batch_size, self.config.seq_length), np.float32), mstype.float32)
-
-    # pylint: disable=W0613
-    def construct(self, tokens: Tensor, input_position=None, init_reset=True, batch_valid_length=None):
-        """Forward of baichuan_13b model."""
-        # preprocess
-        input_mask = self.cast(self.not_equal(
-            tokens, self.pad_token_id), self.dtype)
-
-        if self.is_first_iteration:
-            mask = self.get_attention_mask(input_mask)
-            alibi_tensor = self.build_alibi_tensor(input_mask, self.dtype)
-            # mask: [bs, seq, seq]
-        else:
-            cur_pos = batch_valid_length - 1
-            valid_length = self.reshape(cur_pos, (-1, 1, 1))
-            mask = self.cast(self.le_past(
-                self.range, valid_length), self.dtype)
-            alibi_tensor = self.build_alibi_tensor(self.input_mask_all_ones, self.dtype)
-            # mask: [bs, 1, 1]
-        mask = self.sub(self.one, self.cast(mask, self.dtype))
-        if not self.use_flash_attention:
-            mask = self.expand_dims(mask, 1)
-            mask = self.mul_mask(mask, self.multiply_data)
-
-        # tokens: [bs, seq/1]
-        h = self.tok_embeddings(tokens)
-
-        # h: [bs, seq/1, hidden_dim]
-        for i in range(self.num_layers):
-            h, _ = self.layers[i](h, alibi_tensor, mask,
-                                  init_reset=init_reset, batch_valid_length=batch_valid_length)
-        output = self.norm_out(h)
-        return output
-
-
-class Baichuan13BDecodeLayer(nn.Cell):
-    r"""
-        Transformer Layer. This is an implementation of the single layer of the transformer
-        encoder layer, including multihead attention and feedward layer.
-
-        Args:
-            batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
-                value. When do training or prediction, the argument will not work and the user can just pass None to
-                the argument.
-            seq_length(int): The input sequence length.
-            layer_id(int): The layer id of current transformer block layer.
-            dim(int): The hidden size of the input.
-            num_heads(int): The number of the heads.
-            multiple_of(int): The SwiGLU hidden layer size multiple of large power of 2.
-            norm_eps (float): The epsilon value of the denominator. Default 1e-5.
-            compute_dtype(dtype.Number): The computation type of the layer.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            layernorm_compute_type(dtype.Number): The computation type of the norm.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            softmax_compute_type(dtype.Number): The computation type of the softmax in the attention.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            param_init_type(dtype.Number): The parameter initialization type of the module.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            use_past(bool): Use the past state to compute, used for incremental prediction. For example, if we have two
-                words and want to generate the ten more words. We just need to compute the two words' state only once,
-                and generate the next word one by one. When use_past is True, there are two steps to run the prediction.
-                In the first step, set the is_first_iteration to be True by
-                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
-                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`.
-                At this moment, pass the single step's input tensor, and loop it. Default False.
-            parallel_config(OpParallelConfig, MoEParallelConfig): The parallel configure. When MoE is applied,
-                MoEParallelConfig is effective, otherwise OpParallelConfig is effective. Default `default_dpmp_config`,
-                an instance of `OpParallelConfig` with default args.
-
-        Inputs:
-            - **x** (Tensor) - Float Tensor, shape should be [batch_size, seq_length, hidden_size] or
-              [batch_size * seq_length, hidden_size], if the use_past is False or is_first_iteration=True. Otherwise,
-              should be [batch_size, 1, hidden_size]
-            - **alibi_tensor** (Tensor) - Alibi Tensor for position embedding used in attention.
-            - **mask** (Tensor) - Float Tensor, If the use_past is False or is_first_iteration=True,
-              the attention mask matrix should ba [batch_size, seq_length, seq_length], or None. None means there will
-              be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
-            - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
-              past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
-            - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
-              Used for incremental prediction when the use_past is True. Default None.
-
-        Outputs:
-            Tuple, a tuple contains(`output`, `layer_present`).
-
-            - **output** (Tensor) - The float tensor of the output of the layer with
-              shape (batch_size, seq_length, hidden_size) or (batch_size * seq_length, hidden_size), if the use_past is
-              False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size)
-
-            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
-              ((batch_size, num_heads, head_dim, seq_length),
-              (batch_size, num_heads, seq_length, head_dim)).
-
-    """
-
-    def __init__(self,
-                 batch_size,
-                 seq_length,
-                 layer_id,
-                 dim: int = 512,
-                 n_heads: int = 8,
-                 multiple_of: int = 256,
-                 n_kv_heads: Optional[int] = None,
-                 ffn_dim_multiplier: Optional[int] = None,
-                 norm_eps: float = 1e-5,
-                 compute_dtype=mstype.float16,
-                 layernorm_compute_dtype=mstype.float32,
-                 softmax_compute_dtype=mstype.float32,
-                 param_init_type=mstype.float32,
-                 use_past=False,
-                 use_flash_attention=False,
-                 compute_in_2d=False,
-                 use_past_shard=False,
-                 parallel_config=TransformerOpParallelConfig()):
-        super().__init__()
-        if batch_size or use_past:
-            Validator.check_positive_int(batch_size)
-        self.batch_size = batch_size
-        self.seq_length = seq_length
-        self.layer_id = layer_id
-        self.hidden_size = dim
-        self.n_head = n_heads
-        self.head_dim = self.hidden_size // self.n_head
-        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads
-
-        self.dtype = compute_dtype
-        self.is_first_iteration = True
-        self.use_past = use_past
-        self.compute_in_2d = compute_in_2d
-        self.key_past = None
-        self.value_past = None
-
-        self.reshape = P.Reshape()
-        self.add = P.Add()
-        self.attention_norm = LlamaRMSNorm(
-            self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
-        self.ffn_norm = LlamaRMSNorm(
-            self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
-        self.attention = Baichuan13BAttention(batch_size=batch_size,
-                                              seq_length=seq_length,
-                                              dim=dim,
-                                              n_heads=n_heads,
-                                              n_kv_heads=n_kv_heads,
-                                              compute_dtype=compute_dtype,
-                                              softmax_compute_dtype=softmax_compute_dtype,
-                                              param_init_type=param_init_type,
-                                              use_past=use_past,
-                                              use_flash_attention=use_flash_attention,
-                                              compute_in_2d=compute_in_2d,
-                                              use_past_shard=use_past_shard,
-                                              parallel_config=parallel_config)
-        self.feed_forward = LlamaFeedForward(dim=self.hidden_size,
-                                             hidden_dim=4 * self.hidden_size,
-                                             multiple_of=multiple_of,
-                                             ffn_dim_multiplier=ffn_dim_multiplier,
-                                             compute_dtype=compute_dtype,
-                                             param_init_type=param_init_type)
-
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        self.feed_forward.shard(parallel_config)
-        if self.compute_in_2d:
-            self.add.shard(((dp, 1), (dp, 1)))
-            self.attention_norm.shard((dp, 1))
-            self.ffn_norm.shard((dp, 1))
-        else:
-            self.add.shard(((dp, 1, 1), (dp, 1, 1)))
-            self.attention_norm.shard((dp, 1, 1))
-            self.ffn_norm.shard((dp, 1, 1))
-            self.feed_forward.mul.shard(((dp, 1, mp), (dp, 1, mp)))
-
-        if parallel_config.use_seq_parallel and self.is_first_iteration:
-            if self.compute_in_2d:
-                self.add.shard(((dp * mp, 1), (dp * mp, 1)))
-                self.attention_norm.shard((dp * mp, 1))
-                self.ffn_norm.shard((dp * mp, 1))
-            else:
-                self.add.shard(((dp, mp, 1), (dp, mp, 1)))
-                self.attention_norm.shard((dp, mp, 1))
-                self.ffn_norm.shard((dp, mp, 1))
-            self.feed_forward.w2.shard(
-                ((dp, mp), (1, mp)), out_strategy_matmul=((dp * mp, 1),))
-
-        if self.use_past:
-            kv_shape = (batch_size, self.n_kv_head, seq_length, self.head_dim)
-            self.key_past = Parameter(
-                Tensor(np.zeros(kv_shape), self.dtype), name="key_past")
-            self.value_past = Parameter(
-                Tensor(np.zeros(kv_shape), self.dtype), name="value_past")
-            self.ones = P.Ones()
-            self.mul_past = P.Mul().shard(((dp, 1, 1, 1), (1,)))
-            self.assign_past = P.Assign().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
-            if use_past_shard:
-                self.mul_past.shard(((dp, mp, 1, 1), (1,)))
-                self.assign_past.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-
-    def construct(self, x, alibi_tensor, mask=None, init_reset=True, batch_valid_length=None):
-        """ Forward of transformer block. """
-        self._check_input(x, alibi_tensor, mask,
-                          init_reset, batch_valid_length)
-        # [bs, seq/1, hidden_dim] (first) [bs * seq/1, hidden_dim] (others)
-        if self.compute_in_2d and x.ndim != 2:
-            x = self.reshape(x, (-1, x.shape[-1]))
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        input_x = self.attention_norm(x)
-
-        key_reset = None
-        value_reset = None
-        if self.use_past and self.is_first_iteration:
-            # reset states, init_reset True for reuse and False for reset
-            self.assign_past(self.key_past, self.mul_past(
-                self.key_past, self.cast(init_reset, self.dtype)))
-            self.assign_past(self.value_past, self.mul_past(
-                self.value_past, self.cast(init_reset, self.dtype)))
-            key_reset = self.key_past
-            value_reset = self.value_past
-            # add dependency for desired execution order
-            input_x = ops.depend(input_x, key_reset)
-            input_x = ops.depend(input_x, value_reset)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        h, layer_present = self.attention(input_x, alibi_tensor, mask,
-                                          self.key_past, self.value_past, batch_valid_length)
-        h = self.add(x, h)
-        ffn_norm = self.ffn_norm(h)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        ffn_out = self.feed_forward(ffn_norm)
-
-        value_update = None
-        key_update = None
-        if self.use_past:
-            # current key and value
-            key_present, value_present = layer_present
-            # update key and value calculated this step
-            self.assign_past(self.key_past, key_present)
-            self.assign_past(self.value_past, value_present)
-            key_update = self.key_past
-            value_update = self.value_past
-            # add dependency for desired execution order
-            key_update = ops.depend(key_update, key_reset)
-            value_update = ops.depend(value_update, value_reset)
-
-        # add dependency for desired execution order
-        ffn_out = ops.depend(ffn_out, value_update)
-        ffn_out = ops.depend(ffn_out, key_update)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        out = self.add(h, ffn_out)
-        return out, layer_present
-
-    def _check_input(self, x, alibi_tensor, mask, init_reset, batch_valid_length):
-        r"""Check inputs"""
-        _check_input_dtype(
-            x.dtype, "x", [mstype.float32, mstype.float16], self.cls_name)
-        _check_input_dtype(alibi_tensor.dtype, "alibi_tensor",
-                           [mstype.float32, mstype.float16], self.cls_name)
-        if mask is not None:
-            _check_input_dtype(mask.dtype, "input_mask", [mstype.float32, mstype.float16], self.cls_name)
-
-        if self.use_past:
-            if not isinstance(init_reset, Tensor):
-                init_reset = Tensor([init_reset], mstype.bool_)
-            if not isinstance(batch_valid_length, Tensor):
-                bs = x.shape[0]
-                batch_valid_length = self.ones((bs, 1), mstype.int32)
-            _check_input_dtype(init_reset.dtype, "init_reset", [mstype.bool_], self.cls_name)
-            _check_input_dtype(batch_valid_length.dtype, "batch_valid_length", [mstype.int32], self.cls_name)
-        return True
-
-
-class Baichuan13BAttention(nn.Cell):
-    r"""
-    This is an implementation of multihead attention in Baichuan.
-
-    Args:
-            - **batch_size** (int): The batch size of the input tensor when do increnmental prediction. Should be a
-                positive value.
-                When do training or prediction, the argument will not work and the user can just pass None to the
-                argument.
-            - **src_seq_length** (int): The sequence length of the query vector.
-            - **tgt_seq_length** (int): The sequence length of the key and value vector.
-            - **dim** (int): The hidden size of the input.
-            - **head_dim** (int): The dim of head.
-            - **n_heads** (int): The number of the heads.
-            - **compute_dtype** (dtype.Number): The computation type of dense. Default mstype.float16.
-                Should be mstype.float32 or mstype.float16.
-            - **softmax_compute_type** (dtype.Number): The type of softmax computation module. Default mstype.float32.
-                Should be mstype.float32 or mstype.float16.
-            - **param_init_type** (dtype.Number): The parameter initialization type of the module. Default mstype.
-                float32. Should be mstype.float32 or mstype.float16.
-            - **use_past** (bool): Use the past state to compute, used for incremental prediction.
-                For example, if we have two words and want to generate the ten more words.
-                We just need to compute the two words' state only once, and generate the next word one by one.
-                When use_past is True, there are two steps to run the prediction.
-                In the first step, set the is_first_iteration to be True by
-                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
-                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`. At this moment,
-                pass the single step's input tensor, and loop it. Default False.
-            - **parallel_config** (OpParallelConfig): The parallel configure. Default `default_dpmp_config`,
-                an instance of `OpParallelConfig` with default args.
-
-    Inputs:
-            - **x** (Tensor) - The input tokens with shape (batch_size, src_seq_length, hidden_size) or
-                (batch_size * src_seq_length, hidden_size), if the use_past is False or is_first_iteration=True.
-                Otherwise, must be (batch_size, 1, hidden_size)
-            - **alibi_tensor** (Tensor) - Alibi Tensor for position embedding used in attention.
-            - **mask** (Tensor) - If the use_past is False or is_first_iteration=True, the attention mask
-                matrix should ba (batch_size, src_seq_length, tgt_seq_length), or None. None means there will be no mask
-                in softmax computation. Otherwise, the mask must be (batch_size, 1, tgt_seq_length)
-            - **key_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, head_dim, tgt_seq_length).
-                The past calculated key vector. Used for incremental prediction when the use_past is True.
-                Default None.
-            - **value_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, tgt_seq_length,
-                head_dim).
-                The past calculated value vector. Used for incremental prediction when the use_past is True.
-                Default None.
-            - **batch_valid_length** (Tensor) - Int32 tensor with shape (batch_size,) the past calculated the index.
-                Used for incremental prediction when the use_past is True. Default None.
-
-    Outputs:
-            Tuple, a tuple contains(`output`, `layer_present`)
-
-            - **output** (Tensor) - Tensor, the float tensor of the output of the layer with
-                shape (batch_size, src_seq_length, hidden_size) or (batch_size * src_seq_length, hidden_size),
-                if the use_past is False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size).
-
-            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
-                ((batch_size, num_heads, head_dim, tgt_seq_length),
-                (batch_size, num_heads, tgt_seq_length, head_dim)).
-    """
-
-    def __init__(self,
-                 batch_size,
-                 seq_length,
-                 dim: int = 512,
-                 n_heads: int = 8,
-                 n_kv_heads: Optional[int] = None,
-                 compute_dtype=mstype.float16,
-                 softmax_compute_dtype=mstype.float32,
-                 param_init_type=mstype.float32,
-                 use_past=False,
-                 use_flash_attention=False,
-                 compute_in_2d=False,
-                 use_past_shard=False,
-                 parallel_config=TransformerOpParallelConfig()):
-        super().__init__()
-        self.seq_length = seq_length
-        self.hidden_size = dim
-        self.n_head = n_heads
-        self.head_dim = dim // n_heads
-        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads
-        self.n_rep = self.n_head // self.n_kv_head
-
-        self.dtype = compute_dtype
-        self.softmax_dtype = softmax_compute_dtype
-        self.is_first_iteration = True
-        self.use_past = use_past
-        self.compute_in_2d = compute_in_2d
-        self.use_flash_attention = use_flash_attention
-
-        if self.hidden_size % self.n_head != 0:
-            raise ValueError("For 'MultiHeadAttention', the class variable 'hidden_size' must be a multiple "
-                             "of 'n_head', but got the hidden_size is {} and the n_head is {}."
-                             .format(self.hidden_size, self.n_head))
-        if self.n_kv_head % parallel_config.model_parallel != 0:
-            raise ValueError("For 'MultiHeadAttention', the class variable 'n_kv_head' must be a multiple of "
-                             "'parallel_config.model_parallel', but got the n_kv_head is {} "
-                             "and the parallel_config.model_parallel  is {}."
-                             .format(self.n_kv_head, parallel_config.model_parallel))
-
-        self.inv_norm_factor = Tensor(
-            1.0 / math.sqrt(self.head_dim), dtype=compute_dtype)
-
-        self.reshape = P.Reshape()
-        self.transpose = P.Transpose()
-        self.merger_head_transpose = P.Transpose()
-        self.batch_matmul = P.BatchMatMul()
-        self.batch_matmul_q_k = P.BatchMatMul(transpose_b=True)
-        self.mul = P.Mul()
-        self.add = P.Add()
-        self.add_alibi = P.Add()
-        self.softmax = nn.Softmax().to_float(softmax_compute_dtype)
-        self.cast = P.Cast()
-        self.cast_attn = P.Cast()
-        self.tile_kv = P.Tile()
-
-        self.wo = Linear(in_channels=self.hidden_size,
-                         out_channels=self.hidden_size,
-                         has_bias=False,
-                         compute_dtype=compute_dtype,
-                         param_init_type=param_init_type)
-        self.wq = Linear(self.hidden_size,
-                         self.hidden_size,
-                         has_bias=False,
-                         compute_dtype=compute_dtype,
-                         param_init_type=param_init_type)
-        self.wk = Linear(self.hidden_size,
-                         self.n_kv_head * self.head_dim,
-                         has_bias=False,
-                         compute_dtype=compute_dtype,
-                         param_init_type=param_init_type)
-        self.wv = Linear(self.hidden_size,
-                         self.n_kv_head * self.head_dim,
-                         has_bias=False,
-                         compute_dtype=compute_dtype,
-                         param_init_type=param_init_type)
-
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        self.transpose.shard(((dp, 1, mp, 1),))
-        self.merger_head_transpose.shard(((dp, mp, 1, 1),))
-        self.batch_matmul_q_k.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-        self.batch_matmul.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-        self.mul.shard(((dp, mp, 1, 1), ()))
-        self.add.shard(((dp, 1, 1, 1), (dp, mp, 1, 1)))
-        self.add_alibi.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-        self.softmax.softmax.shard(((dp, mp, 1, 1),))
-        self.tile_kv.shard(((dp * mp, 1, 1, 1),))
-
-        self.wq.shard(((dp, 1), (mp, 1)))
-        self.wk.shard(((dp, 1), (mp, 1)))
-        self.wv.shard(((dp, 1), (mp, 1)))
-        self.wo.shard(((dp, mp), (1, mp)))
-        if parallel_config.use_seq_parallel and self.is_first_iteration:
-            self.wo.shard(((dp, mp), (1, mp)),
-                          out_strategy_matmul=((dp * mp, 1),))
-        if parallel_config.recompute.select_recompute:
-            self.tile_kv.recompute()
-            self.batch_matmul_q_k.recompute()
-            self.mul.recompute()
-            self.add.recompute()
-            self.cast_attn.recompute()
-            self.softmax.softmax.recompute()
-            self.batch_matmul.recompute()
-
-        if self.use_flash_attention:
-            self.flash_attention = FlashAttention(head_num=n_heads,
-                                                  scale_value=1. / math.sqrt(self.head_dim),
-                                                  input_layout='BNSD',
-                                                  pre_tokens=65536,
-                                                  next_tokens=0,
-                                                  use_alibi_mask=True)
-            self.flash_attention.shard(parallel_config)
-            if parallel_config.recompute.select_recompute:
-                self.flash_attention.recompute()
-
-        if self.use_past:
-            # operators used for state reuse
-            seq_range = np.arange(seq_length).reshape(1, 1, -1)
-            self.range = Tensor(
-                np.tile(seq_range, (batch_size, 1, 1)), mstype.int32)
-            self.expand_dims = P.ExpandDims().shard(((dp, 1, 1),))
-            self.add_past = P.Add().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
-            self.equal = P.Equal().shard(((dp, 1, 1), (dp, 1, 1)))
-            self.less = P.Less().shard(((dp, 1, 1), (dp, 1, 1)))
-            self.mul_past = P.Mul().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
-            if use_past_shard:
-                self.add_past.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-                self.mul_past.shard(((dp, mp, 1, 1), (dp, 1, 1, 1)))
-
-    def construct(self, x: Tensor, alibi_tensor: Tensor, mask=None,
-                  key_past=None, value_past=None, batch_valid_length=None):
-        """Forward process of the MultiHeadAttention"""
-        ori_dtype = x.dtype
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        x = self.reshape(x, (-1, x.shape[-1]))
-        # [bs * seq/1, hidden_dim]
-        query = self.cast(self.wq(x), self.dtype)  # dp, 1 -> dp, mp
-        key = self.cast(self.wk(x), self.dtype)    # dp, 1 -> dp, mp
-        value = self.cast(self.wv(x), self.dtype)  # dp, 1 -> dp, mp
-        query = self.reshape(query, (-1, self._get_seq_length_under_incremental(self.seq_length),
-                                     self.n_head, self.head_dim))
-        key = self.reshape(key, (-1, self._get_seq_length_under_incremental(self.seq_length),
-                                 self.n_kv_head, self.head_dim))
-        value = self.reshape(value, (-1, self._get_seq_length_under_incremental(self.seq_length),
-                                     self.n_kv_head, self.head_dim))
-        # [bs, seq/1, n_head/n_kv_head, head_dim]
-        query = self.transpose(query, (0, 2, 1, 3))
-        key = self.transpose(key, (0, 2, 1, 3))
-        value = self.transpose(value, (0, 2, 1, 3))
-
-        # kv cache: [bs, n_kv_head, 1, head_dim] -> [bs, n_kv_head, seq, head_dim]
-        key_present = key
-        value_present = value
-        if self.use_past:
-            # The first graph with the input size of (bs, seq_length)
-            if self.is_first_iteration:
-                # Get the valid input length without padding
-                valid_length_vector = (
-                    self.less(self.range, batch_valid_length.view(-1, 1, 1))).astype(self.dtype)
-                # Cover the key and value numbers corresponding to the padding position
-                key_present = self.mul_past(
-                    key, self.expand_dims(valid_length_vector, 3))
-                value_present = self.mul_past(
-                    value, self.expand_dims(valid_length_vector, 3))
-            # The second graph with the inpus size of (bs, 1)
-            else:
-                # Get the current token position index
-                valid_length = batch_valid_length - 1
-                valid_length = self.reshape(valid_length, (-1, 1, 1))
-                valid_length_vector = (self.equal(
-                    self.range, valid_length)).astype(self.dtype)
-                # Pad the key and value to seq_length with only the position index not zero
-                current_key = self.mul_past(
-                    key, self.expand_dims(valid_length_vector, 3))
-                current_value = self.mul_past(
-                    value, self.expand_dims(valid_length_vector, 3))
-                # Concat the previous saved state and current state
-                key = self.add_past(key_past, current_key)
-                value = self.add_past(value_past, current_value)
-                # Update key_present and value_present for state update
-                key_present = key
-                value_present = value
-
-        layer_present = (key_present, value_present)
-        # kv share: [bs, n_kv_head, seq, head_dim] -> [bs, n_head, seq, head_dim]
-        key = self._repeat_kv(key, self.n_rep)
-        value = self._repeat_kv(value, self.n_rep)
-        # q, k, v: [bs, n_head, seq/1, head_dim], [bs, n_head, seq, head_dim], [bs, n_head, seq, head_dim]
-        if self.use_flash_attention:
-            mask = self.expand_dim_post(mask, 1)
-            mask = self.cast(mask, mstype.uint8)
-            attention = self.flash_attention(query, key, value, mask)
-            attention = self._merge_heads(attention)
-        else:
-            attention = self._attn(query, key, value, alibi_tensor, mask)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        output = self.wo(attention)  # dp, mp -> dp, 1 / dp * mp, 1
-        output = self.cast(output, ori_dtype)
-
-        return output, layer_present
-
-    def _repeat_kv(self, x, rep):
-        if rep == 1:
-            return x
-        bs, n_kv_head, seqlen, head_dim = x.shape
-        x = self.reshape(x, (bs * n_kv_head, 1, seqlen, head_dim))
-        x = self.tile_kv(x, (1, rep, 1, 1))
-        x = self.reshape(x, (bs, n_kv_head * rep, seqlen, head_dim))
-        return x
-
-    def _get_seq_length_under_incremental(self, length):
-        r"""Return the length of the tensor.
-            For the incremental prediction, the seq length for the input is 1.
-        """
-        if self.use_past and not self.is_first_iteration:
-            return 1
-        return length
-
-    def _merge_heads(self, x):
-        """
-        convert a 4d input to a 2d or 3d output
-
-        Inputs:
-            x: input tensor
-
-        Output:
-            x_merge: the 2d output
-        """
-        # [bs, n_head, seq/1, head_dim]
-        x = self.merger_head_transpose(
-            x, (0, 2, 1, 3))  # dp,mp,1,1 -> dp,1,mp,1
-        # [bs, seq/1, n_head, head_dim]
-        x_shape = x.shape
-        if self.compute_in_2d:
-            # [bs * seq/1, hidden_dim]
-            new_shape = (-1, x_shape[-2] * x_shape[-1])
-        else:
-            # [bs, seq/1, hidden_dim]
-            new_shape = (x_shape[0], x_shape[1], -1)
-        x_merge = self.reshape(x, new_shape)
-        return x_merge
-
-    def _attn(self, query, key, value, alibi_tensor, mask):
-        """
-        Get the weighted score along the seq_length
-
-        Inputs:
-            query: the query matrix
-            key: the key matrix
-            value: the value matrix
-            mask: the attention mask adder matrix with shape (batch_size,
-            1, seq_length, seq_length)
-        Outputs:
-            weighted_values: Tensor, the weighted sum scores
-        """
-        # q, k: [bs, n_head, seq/1, head_dim], [bs, n_head, seq, head_dim]
-        score = self.batch_matmul_q_k(query, key)
-        # score: [bs, n_head, seq/1, seq]
-        score = self.mul(score, self.inv_norm_factor)
-        score = self.add_alibi(score, alibi_tensor)
-
-        score = self.add(mask, score)
-
-        attention_probs = self.softmax(
-            self.cast_attn(score, self.softmax_dtype))
-        # score, v: [bs, n_head, seq/1, seq], [bs, n_head, seq, head_dim]
-        weighted_values = self.batch_matmul(
-            self.cast(attention_probs, self.dtype), value)
-        # [bs, n_head, seq/1, head_dim]
-        attention_merge = self._merge_heads(weighted_values)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        return attention_merge
diff --git a/research/baichuan/convert_reversed.py b/research/baichuan/convert_reversed.py
deleted file mode 100644
index 842399a8..00000000
--- a/research/baichuan/convert_reversed.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Convert Baichuan weight.
-Support mindformers format.
-"""
-import collections
-import json
-import argparse
-import torch
-
-import mindspore as ms
-
-from mindformers.utils.convert_utils import ms2pt, is_lora_param
-
-
-def read_json(path):
-    with open(path, "r") as f:
-        return json.load(f)
-
-
-def name_replace(name: str):
-    """replace ms param name to hf."""
-    name = name.replace('tok_embeddings.embedding_weight', 'embed_tokens.weight')
-    name = name.replace('.attention.wq.', '.self_attn.q_proj.')
-    name = name.replace('.attention.wk.', '.self_attn.k_proj.')
-    name = name.replace('.attention.wv.', '.self_attn.v_proj.')
-    name = name.replace('.attention.wo.', '.self_attn.o_proj.')
-    name = name.replace('.feed_forward.w1.', '.mlp.gate_proj.')
-    name = name.replace('.feed_forward.w2.', '.mlp.down_proj.')
-    name = name.replace('.feed_forward.w3.', '.mlp.up_proj.')
-    name = name.replace('.attention_norm.', '.input_layernorm.')
-    name = name.replace('.ffn_norm.', '.post_attention_layernorm.')
-    return name
-
-
-# pylint: disable=W0613
-def convert_ms_to_pt(input_path, output_path, dtype=None, **kwargs):
-    """convert baichuan ms weight to hf."""
-    print(f"Trying to convert mindspore checkpoint in '{input_path}'.", flush=True)
-    model_ms = ms.load_checkpoint(input_path)
-
-    state_dict = {}
-    attention_dict = collections.defaultdict(lambda: {})
-    for name, value in model_ms.items():
-        if is_lora_param(name):
-            name = name.replace('mindpet_delta_lora_a', 'lora_A.weight')
-            name = name.replace('mindpet_delta_lora_b', 'lora_B.weight')
-        value = ms2pt(value, dtype)
-        if '.attention.wq' in name:
-            name = name.replace('.attention.wq', '.self_attn.W_pack')
-            attention_dict[name]['wq'] = value
-            continue
-        if '.attention.wk' in name:
-            name = name.replace('.attention.wk', '.self_attn.W_pack')
-            attention_dict[name]['wk'] = value
-            continue
-        if '.attention.wv' in name:
-            name = name.replace('.attention.wv', '.self_attn.W_pack')
-            attention_dict[name]['wv'] = value
-            continue
-        if name == "model.norm_out.weight":
-            name = 'model.norm.weight'
-
-        name = name_replace(name)
-        print(f'\rprocessing parameter: {name} {value.shape}     ', end='', flush=True)
-        state_dict[name] = value
-    for name, value_dict in attention_dict.items():
-        state_dict[name] = torch.cat((value_dict['wq'], value_dict['wk'], value_dict['wv']))
-
-    torch.save(state_dict, output_path)
-    print(f"\rConvert baichuan checkpoint finished, the huggingface checkpoint is saved in '{output_path}'.",
-          flush=True)
-    return True
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--mindspore_ckpt_path', default='transform.ckpt')
-    parser.add_argument('--torch_ckpt_path', default='torch.bin')
-    args = parser.parse_args()
-    convert_ms_to_pt(input_path=args.mindspore_ckpt_path, output_path=args.torch_ckpt_path)
diff --git a/research/baichuan/convert_weight.py b/research/baichuan/convert_weight.py
deleted file mode 100644
index 3a59102d..00000000
--- a/research/baichuan/convert_weight.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Convert Baichuan weight.
-Support huggingface format.
-"""
-
-import os
-import json
-import argparse
-
-import mindspore as ms
-
-from mindformers.utils.convert_utils import pt2ms
-
-
-def read_json(path):
-    with open(path, "r") as f:
-        return json.load(f)
-
-
-def name_replace(name: str):
-    """replace hf param name to ms."""
-    name = name.replace('embed_tokens.weight', 'tok_embeddings.embedding_weight')
-    name = name.replace('.self_attn.q_proj.', '.attention.wq.')
-    name = name.replace('.self_attn.k_proj.', '.attention.wk.')
-    name = name.replace('.self_attn.v_proj.', '.attention.wv.')
-    name = name.replace('.self_attn.o_proj.', '.attention.wo.')
-    name = name.replace('.mlp.gate_proj.', '.feed_forward.w1.')
-    name = name.replace('.mlp.down_proj.', '.feed_forward.w2.')
-    name = name.replace('.mlp.up_proj.', '.feed_forward.w3.')
-    name = name.replace('.input_layernorm.', '.attention_norm.')
-    name = name.replace('.post_attention_layernorm.', '.ffn_norm.')
-    name = name.replace('.norm.', '.norm_out.')
-    return name
-
-# pylint: disable=W0613
-def convert_pt_to_ms(input_path, output_path, dtype=None, **kwargs):
-    """convert baichuan hf weight to ms."""
-    ckpt_dir = os.path.dirname(input_path)
-    print(f"Trying to convert huggingface checkpoint in '{ckpt_dir}'.", flush=True)
-    import torch
-    from transformers import AutoModelForCausalLM
-
-    try:
-        model_hf = AutoModelForCausalLM.from_pretrained(ckpt_dir, trust_remote_code=True)
-        args_hf = read_json(os.path.join(ckpt_dir, "config.json"))
-    # pylint: disable=W0703
-    except Exception as e:
-        print(f"Error {e}.", flush=True)
-        return False
-
-    dim = args_hf["hidden_size"]
-
-    ckpt_list = []
-    for name, value in model_hf.state_dict().items():
-        name = name_replace(name)
-        if 'W_pack' in name:
-            values = torch.split(value, dim)
-            wq = name.replace('.self_attn.W_pack', '.attention.wq')  # '.self_attn.q_proj.', '.attention.wq.'
-            q_value = values[0]
-            wk = name.replace('.self_attn.W_pack', '.attention.wk')
-            k_value = values[1]
-            wv = name.replace('.self_attn.W_pack', '.attention.wv')
-            v_value = values[2]
-            print(f'\rprocessing parameter: {wq} {q_value.shape}     ', end='', flush=True)
-            ckpt_list.append({'name': wq, 'data': pt2ms(q_value, dtype)})
-            print(f'\rprocessing parameter: {wk} {k_value.shape}     ', end='', flush=True)
-            ckpt_list.append({'name': wk, 'data': pt2ms(k_value, dtype)})
-            print(f'\rprocessing parameter: {wv} {v_value.shape}     ', end='', flush=True)
-            ckpt_list.append({'name': wv, 'data': pt2ms(v_value, dtype)})
-            continue
-        if name == 'norm.weight':
-            name = 'norm_out.weight'
-        if name[:7] == 'layers.':
-            name = name[7:]
-        print(f'\rprocessing parameter: {name} {value.shape}     ', end='', flush=True)
-        ckpt_list.append({'name': name, 'data': pt2ms(value, dtype)})
-
-    ms.save_checkpoint(ckpt_list, output_path)
-    print(f"\rConvert baichuan checkpoint finished, the mindspore checkpoint is saved in '{output_path}'.",
-          flush=True)
-    return True
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--torch_ckpt_path', default='./hf.bin')
-    parser.add_argument('--mindspore_ckpt_path', default='transform.ckpt')
-    args = parser.parse_args()
-    convert_pt_to_ms(input_path=args.torch_ckpt_path, output_path=args.mindspore_ckpt_path)
diff --git a/research/baichuan/run_baichuan_13b.yaml b/research/baichuan/run_baichuan_13b.yaml
deleted file mode 100644
index eab17ffc..00000000
--- a/research/baichuan/run_baichuan_13b.yaml
+++ /dev/null
@@ -1,212 +0,0 @@
-seed: 0
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'baichuan_13b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-8 # 1e-8
-  learning_rate: 3.e-4
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 3.e-4
-  lr_end: 3.e-5
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids"]  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 32 for Atlas 800
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 4
-  use_seq_parallel: False
-  micro_batch_num: 32
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "baichuan_13b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "31GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 4096
-    hidden_size: 5120
-    num_layers: 40
-    num_heads: 40
-    vocab_size: 64000
-    multiple_of: 107
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    ignore_token_id: -100
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    compute_in_2d: False
-    use_flash_attention: False
-    offset: 0
-    use_past_shard: False
-    checkpoint_name_or_path: ""
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: Baichuan13BForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    vocab_file: baichuan_13b_tokenizer.model
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<pad>'
-    add_bos_token: False
-    add_eos_token: False
-    type: LlamaTokenizer
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4294967296
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/baichuan/run_baichuan_13b_910b.yaml b/research/baichuan/run_baichuan_13b_910b.yaml
deleted file mode 100644
index 47c3e534..00000000
--- a/research/baichuan/run_baichuan_13b_910b.yaml
+++ /dev/null
@@ -1,212 +0,0 @@
-seed: 0
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'baichuan_13b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-8 # 1e-8
-  learning_rate: 3.e-4
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 3.e-4
-  lr_end: 3.e-5
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids"]  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 8 for Atlas 800T A2
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  micro_batch_num: 1
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "baichuan_13b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "57GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 4096
-    hidden_size: 5120
-    num_layers: 40
-    num_heads: 40
-    vocab_size: 64000
-    multiple_of: 107
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    ignore_token_id: -100
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    compute_in_2d: True
-    use_flash_attention: False
-    offset: 0
-    use_past_shard: False
-    checkpoint_name_or_path: ""
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: Baichuan13BForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    vocab_file: baichuan_13b_tokenizer.model
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<pad>'
-    add_bos_token: False
-    add_eos_token: False
-    type: LlamaTokenizer
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4294967296
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/baichuan/run_baichuan_13b_base.py b/research/baichuan/run_baichuan_13b_base.py
deleted file mode 100644
index b33e4ced..00000000
--- a/research/baichuan/run_baichuan_13b_base.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Baichuan13b Train/Finetune/Eval/Predict scripts."""
-
-import argparse
-
-from mindformers import Trainer
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.tools.utils import check_in_modelarts, set_remote_save_url, str2bool
-
-# pylint: disable=W0611
-import baichuan_13b
-
-
-def context_init(use_parallel=False, optimizer_parallel=False):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=0)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                enable_parallel_optimizer=optimizer_parallel,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(task='text_generation',
-         config='run_baichuan_13b.yaml',
-         run_mode='train',
-         use_parallel=False,
-         ckpt=None,
-         resume=False,
-         train_dataset='',
-         eval_dataset='',
-         predict_data='',
-         max_length=512,
-         op=True,
-         remote_save_url=None):
-    """main function."""
-    # 适配aicc
-    if check_in_modelarts() and remote_save_url:
-        print("remote_save_url is %s, the output file will be uploaded to here.", remote_save_url)
-        set_remote_save_url(remote_save_url)
-
-    # 环境初始化
-    context_init(use_parallel, op)
-
-    # 定义任务，预先准备好相应数据集
-
-    if run_mode == 'train':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.train(train_checkpoint=ckpt, resume=resume)
-    elif run_mode == 'finetune':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.finetune(finetune_checkpoint=ckpt, resume=resume)
-    elif run_mode == 'eval':
-        trainer = Trainer(args=config,
-                          task=task,
-                          eval_dataset=eval_dataset)
-        trainer.evaluate(eval_checkpoint=ckpt)
-    elif run_mode == 'predict':
-        trainer = Trainer(args=config,
-                          task=task)
-        result = trainer.predict(input_data=predict_data,
-                                 predict_checkpoint=ckpt, max_length=int(max_length))
-        print(result)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--task', default='text_generation', type=str,
-                        help='set task type.')
-    parser.add_argument('--config', default='run_baichuan_13b.yaml', type=str,
-                        help='set task type.')
-    parser.add_argument('--run_mode', default='train', type=str,
-                        help='set run mode for model.')
-    parser.add_argument('--use_parallel', default=True, type=str2bool,
-                        help='open parallel for model.')
-    parser.add_argument('--load_checkpoint', default="", type=str,
-                        help='checkpoint name or dir to load.')
-    parser.add_argument('--resume', default=False, type=str2bool,
-                        help='whether resume training.')
-    parser.add_argument('--train_dataset', default='', type=str,
-                        help='set train dataset.')
-    parser.add_argument('--eval_dataset', default='', type=str,
-                        help='set eval dataset.')
-    parser.add_argument('--predict_data', default='', type=str,
-                        help='input predict data.')
-    parser.add_argument('--predict_length', default=512, type=int,
-                        help='max length for predict output.')
-    parser.add_argument('--optimizer_parallel', default=True, type=str2bool,
-                        help='whether use optimizer parallel. Default: None')
-    parser.add_argument('--remote_save_url', default="", type=str,
-                        help='whether use optimizer parallel. Default: None')
-    args = parser.parse_args()
-
-    main(task=args.task,
-         config=args.config,
-         run_mode=args.run_mode,
-         use_parallel=args.use_parallel,
-         ckpt=args.load_checkpoint,
-         resume=args.resume,
-         train_dataset=args.train_dataset,
-         eval_dataset=args.eval_dataset,
-         predict_data=args.predict_data,
-         max_length=args.predict_length,
-         op=args.optimizer_parallel,
-         remote_save_url=args.remote_save_url)
diff --git a/research/baichuan/run_baichuan_13b_chat.py b/research/baichuan/run_baichuan_13b_chat.py
deleted file mode 100644
index 172d77f2..00000000
--- a/research/baichuan/run_baichuan_13b_chat.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Baichuan13b Train/Finetune/Eval/Predict scripts."""
-
-import argparse
-
-import mindspore as ms
-
-from mindformers import LlamaConfig, LlamaTokenizer, TextStreamer
-
-from baichuan_13b import Baichuan13BForCausalLM
-
-
-USER_TOKEN_ID = 195
-ASSISTANT_TOKEN_ID = 196
-
-def main(tk_config='./', ckpt=None, max_new_tokens=512):
-    """main function."""
-
-    # initialize Graph Mode
-    ms.set_context(mode=0)
-
-    tokenizer = LlamaTokenizer.from_pretrained(tk_config)
-
-    config = LlamaConfig(batch_size=1,  # add for increase predict
-                         seq_length=1024,
-                         hidden_size=5120,
-                         num_layers=40,
-                         num_heads=40,
-                         vocab_size=64000,
-                         multiple_of=107,
-                         rms_norm_eps=1.0e-6,
-                         bos_token_id=1,
-                         eos_token_id=2,
-                         pad_token_id=0,
-                         ignore_token_id=-100,
-                         use_past=True,
-                         repetition_penalty=1.1,
-                         temperature=0.3,
-                         max_decode_length=1024,
-                         top_k=5,
-                         top_p=0.85,
-                         do_sample=True,
-                         checkpoint_name_or_path=ckpt)
-
-    baichuan_13b = Baichuan13BForCausalLM(config)
-
-    streamer = TextStreamer(tokenizer, skip_prompt=True)
-
-    messages = []
-    while True:
-        messages.append({"role": "user", "content": input("请输入：")})
-        input_ids = build_chat_input(config, tokenizer, messages, max_new_tokens)
-        outputs = baichuan_13b.generate(input_ids,
-                                        streamer=streamer,
-                                        temperature=0.3,
-                                        top_k=5,
-                                        top_p=0.85,
-                                        repetition_penalty=1.1,
-                                        do_sample=True)
-
-        response = tokenizer.decode(outputs[0][len(input_ids):], skip_speical_tokens=True)
-        messages.append({"role": "assistant", "content": response})
-
-
-def build_chat_input(config, tokenizer, messages, max_new_tokens=None):
-    """add prompt for baichuan input, and truncate input if too long."""
-    def _parse_messages(messages, split_role="user"):
-        system, rounds = "", []
-        r = []
-        for i, message in enumerate(messages):
-            if message["role"] == "system":
-                assert i == 0
-                system = message["content"]
-                continue
-            if message["role"] == split_role and r:
-                rounds.append(r)
-                r = []
-            r.append(message)
-        if r:
-            rounds.append(r)
-        return system, rounds
-
-    max_new_tokens = max_new_tokens or config.max_decode_length // 2
-    max_input_tokens = config.max_decode_length - max_new_tokens
-    system, rounds = _parse_messages(messages, split_role="user")
-    system_tokens = tokenizer.encode(system)
-    max_history_tokens = max_input_tokens - len(system_tokens)
-
-    history_tokens = []
-    for r in rounds[::-1]:
-        round_tokens = []
-        for message in r:
-            if message["role"] == "user":
-                round_tokens.append(USER_TOKEN_ID)
-            else:
-                round_tokens.append(ASSISTANT_TOKEN_ID)
-            round_tokens.extend(tokenizer.encode(message["content"]))
-        if not history_tokens or len(history_tokens) + len(round_tokens) <= max_history_tokens:
-            history_tokens = round_tokens + history_tokens  # concat left
-            if len(history_tokens) < max_history_tokens:
-                continue
-        break
-
-    input_tokens = system_tokens + history_tokens
-    if messages[-1]["role"] != "assistant":
-        input_tokens.append(ASSISTANT_TOKEN_ID)
-    input_tokens = input_tokens[-max_input_tokens:]  # truncate left
-    return input_tokens
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--config', default=None, type=str,
-                        help='config used to init tokenizer.')
-    parser.add_argument('--load_checkpoint', default=None, type=str,
-                        help='checkpoint name or dir to load.')
-    parser.add_argument('--max_new_tokens', default=None, type=int,
-                        help='max new tokens will be generated.')
-    args = parser.parse_args()
-
-    main(tk_config=args.config, ckpt=args.load_checkpoint, max_new_tokens=args.max_new_tokens)
diff --git a/research/baichuan/run_baichuan_7b.yaml b/research/baichuan/run_baichuan_7b.yaml
deleted file mode 100644
index e3e396ae..00000000
--- a/research/baichuan/run_baichuan_7b.yaml
+++ /dev/null
@@ -1,205 +0,0 @@
-seed: 0
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-use_parallel: True
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'baichuan_7b'
-
-# runner config
-runner_config:
-  epochs: 2
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-8 # 1e-8
-  learning_rate: 3.e-4
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 3.e-4
-  lr_end: 3.e-5
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids"]  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-eval_step_interval: -1        # num of step intervals between each eval, -1 means no step end eval.
-eval_epoch_interval: 50        # num of epoch intervals between each eval, 1 means eval on every epoch end.
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# default parallel of device num = 8 for Atlas 800
-parallel_config:
-  data_parallel: 2
-  model_parallel: 1
-  pipeline_stage: 4
-  micro_batch_num: 16
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "baichuan"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "31GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-    hidden_size: 4096
-    num_layers: 32
-    num_heads: 32
-    vocab_size: 64000
-    multiple_of: 256
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    ignore_token_id: -100
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float32"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    offset: 0
-    checkpoint_name_or_path: "baichuan_7b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: LlamaForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<pad>'
-    type: LlamaTokenizer
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 65536
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/codegeex/code_tokenizer.py b/research/codegeex/code_tokenizer.py
deleted file mode 100644
index 8e8b8b87..00000000
--- a/research/codegeex/code_tokenizer.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# coding=utf-8
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tokenizer classes for CodeGeex."""
-import numpy as np
-from mindformers import AutoTokenizer
-from mindformers.models.tokenization_utils import AddedToken
-
-
-def encode_whitespaces(text: str, start_extra_id: int, max_len: int):
-    """ Encode whitespaces to extra tokens.
-
-    >>> encode_whitespaces('a\\n  b\\n   c', 10, 10)
-    'a\\n<|extratoken_10|>b\\n<|extratoken_11|>c'
-    """
-    for i in np.arange(max_len, 1, -1):
-        text = text.replace(
-            " " * i, f"<|extratoken_{start_extra_id + i - 2}|>")
-    return text
-
-
-def decode_whitespaces(text: str, start_extra_id: int, max_len: int):
-    """ Decode the whitespace-encoded strings produced by encode_whitespace.
-
-    >>> text = 'a\\n  b\\n   c'
-    >>> s, l = 10, 10
-    >>> text == decode_whitespaces(encode_whitespaces(text, s, l), s, l)
-    True
-    """
-    for l in range(2, max_len + 1):
-        token_id = start_extra_id - 2 + l
-        token = f'<|extratoken_{token_id}|>'
-        text = text.replace(token, ' ' * l)
-    text = text.replace(f'<|endoftext|>', '')
-    return text if not text.startswith("vocab_pad_token") else ''
-
-
-class CodeTokenizer():
-    """Tokenizer classes for CodeGeex"""
-    def __init__(
-            self,
-            vocab_size,
-            start_extra_id: int = 10,
-            max_len: int = 10,
-    ):
-        self.tokenizer = AutoTokenizer.from_pretrained('gpt2')
-        sp_tokens = [AddedToken(
-            f'<|extratoken_{token_id-50256}|>', lstrip=False, rstrip=False) for token_id in range(50257, 50400)]
-        self.tokenizer.add_special_tokens(
-            {"additional_special_tokens": sp_tokens})
-        num_pad = vocab_size - self.tokenizer.vocab_size
-        vocab_pad_tokens = ["vocab_pad_token{}".format(
-            i) for i in range(1, num_pad + 1)]
-        self.tokenizer.add_tokens(vocab_pad_tokens)
-
-        self.start_extra_id = start_extra_id
-        self.max_len = max_len
-        self.eos_token_id = self.tokenizer.eos_token_id
-
-    def encode_code(self, code: str):
-        code = encode_whitespaces(code, self.start_extra_id, self.max_len)
-        input_ids = self.tokenizer(code).input_ids
-        return input_ids
-
-    def decode_code(self, input_ids):
-        texts = self.tokenizer.batch_decode(input_ids)
-        output_code = [decode_whitespaces(
-            text, self.start_extra_id, self.max_len) for text in texts]
-        return output_code
diff --git a/research/codegeex/codegeex.md b/research/codegeex/codegeex.md
deleted file mode 100644
index 7b579109..00000000
--- a/research/codegeex/codegeex.md
+++ /dev/null
@@ -1,142 +0,0 @@
-# CodeGeex
-
-CodeGeeX是一个具有130亿参数的多编程语言代码生成预训练模型。CodeGeeX采用华为MindSpore框架实现，在鹏城实验室“鹏城云脑II”中的192个节点（共1536个国产昇腾910 AI处理器）上训练而成。
-
-## 快速使用
-
-### CodeGeex-13B 预训练权重转换
-
-通过[该链接](https://models.aminer.cn/codegeex/download/request)申请权重，您将收到一个包含临时下载链接文件```urls.txt```的邮件。推荐使用[aria2](https://aria2.github.io/)通过以下命令快速下载（请保证有足够的硬盘空间存放权重（～26GB））：
-
-```bash
-aria2c -x 16 -s 16 -j 4 --continue=true -i urls.txt
-```
-
-使用以下命令合并得到完整的权重：
-
-```bash
-cat codegeex_13b.tar.gz.* > codegeex_13b.tar.gz
-tar xvf codegeex_13b.tar.gz
-```
-
-执行权重转换脚本
-
-```shell
-python research/codegeex/convert_weight.py --torch_path TORCH_CKPT_DIR --mindspore_path MS_CKPT_NAME
-```
-
-```text
-# 参数说明
-TORCH_CKPT_DIR: torch权重保存目录路径
-mindspore_path: 权重保存文件名，保存为TORCH_CKPT_DIR/OUTPUT_NAME, 也可以指定为自定义保存路径
-```
-
-### 基于API接口推理
-
-使用MindSpore API进行推理.
-
-Atlas 800T A2需要配置环境变量
-
-```shell
-# node 1
-export MS_ENABLE_GE=1
-export MS_GE_TRAIN=1
-export MS_ENABLE_REF_MODE=1
-export MS_GE_ATOMIC_CLEAN_POLICY=1
-```
-
-```python
-# >>> `chat.py`文件
-import numpy as np
-from typing import *
-from mindspore.parallel import set_algo_parameters
-from mindformers import PanguAlphaConfig, init_context
-from code_tokenizer import CodeTokenizer
-from codegeex import CodeGeexHeadModel
-
-
-# set context
-context_config = {"device_target": "Ascend", "mode": 0,  "max_device_memory": "31GB", "device_id": 2}
-parallel_context_config = {"parallel_mode": 1, "gradients_mean": False, "full_batch": True}
-rank_id, device_num = init_context(use_parallel=False, context_config=context_config, parallel_config=parallel_context_config)
-set_algo_parameters(elementwise_op_strategy_follow=True, fully_use_devices=True)
-
-config = PanguAlphaConfig(
-    checkpoint_name_or_path=CKPT_PATH,
-    batch_size = 1,
-    seq_length = 2048,
-    vocab_size = 52224,
-    hidden_size = 5120,
-    ffn_hidden_size = 20480,
-    num_layers = 40,
-    num_heads = 40,
-    pad_token_id = 50256,
-    eos_token_id = 50256,
-    post_layernorm_residual = False,
-    param_init_type = 'float16',
-    compute_dtype = 'float16',
-    softmax_compute_type = 'float32',
-    dropout_rate = 0.1,
-    hidden_act = 'fast_gelu',
-    use_past = True,
-    use_moe = False,
-    expert_num = 1,
-    per_token_num_experts_chosen = 1,
-    repetition_penalty = 1,
-    max_decode_length = 1024,
-    top_k = 100,
-    top_p = 0.95,
-    temperature = 0.8,
-    do_sample = True,
-    eod_mask_loss = False,
-    )
-
-def chat():
-    model = CodeGeexHeadModel(config)
-    model.set_train(False)
-    question_list = [
-        "def add(a, b):\n    '''\n    Find the sum of a and b.\n    '''\n",
-        "bool prime(int n) {\n    // Find whether n is a prime number\n",
-        ]
-
-    # Define tokenizer
-    tokenizer = CodeTokenizer(config.vocab_size)
-    i = 0
-    for question in question_list:
-        inputs = tokenizer.encode_code(question)
-        inputs = np.array([inputs]).astype(np.int32) # add batch dim
-        outputs = model.generate(inputs, max_length=1024, top_p=0.95, temperature=0.8, eos_token_id=50256)
-        output_samples = tokenizer.decode_code(outputs)
-        output_samples_str = "".join(output_samples)
-        print(f"=================== prompt {i} ====================")
-        print(question, flush=True)
-        print(f"=================== generation {i} ====================")
-        print(output_samples_str, flush=True)
-        i = i + 1
-
-
-if __name__ == "__main__":
-    chat()
-
-
-```
-
-### 单机多卡运行训练
-
-```shell
-# node 1
-export MS_ENABLE_GE=1
-export MS_GE_TRAIN=1
-export MS_ENABLE_REF_MODE=1
-export MS_GE_ATOMIC_CLEAN_POLICY=1
-cd mindformers/research
-bash run_singlenode.sh "python codegeex/run_codegeex.py --config codegeex/run_codegeex_910b.yaml --run_mode=train --train_data path/to/mindrecord_dir" path/to/rank_table_file [0,8] 8
-```
-
-**参数说明**
-  `config`: code_geex相关配置文件
-  `run_mode`：运行模式，包括train，finetune，eval，predict
-  `train_data`：train数据，训练时需要填入。
-
-  更多输入可参考`run_codegeex.py
-  `脚本内入参
diff --git a/research/codegeex/codegeex.py b/research/codegeex/codegeex.py
deleted file mode 100644
index 1d5ee1ee..00000000
--- a/research/codegeex/codegeex.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""CodeGeex training wrapper"""
-
-import numpy as np
-import mindspore.common.dtype as mstype
-from mindspore.common.tensor import Tensor
-from mindspore.ops import functional as F
-from mindspore.ops import operations as P
-
-
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.core.loss import CrossEntropyLoss
-from mindformers.models.pangualpha import PanguAlphaHeadModel
-
-
-__all__ = ['CodeGeexHeadModel']
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class CodeGeexHeadModel(PanguAlphaHeadModel):
-    """
-    CodeGeex training loss for generation.
-    Args:
-        config(CodeGeexConfig)
-    Returns:
-        output: Tensor, the loss of the network
-    """
-
-    def __init__(self, config):
-        super(CodeGeexHeadModel, self).__init__(config)
-        self.pad_token = Tensor(config.pad_token_id)
-        dp = config.parallel_config.data_parallel
-        self.eod_token = config.eod_token
-        self.loss = CrossEntropyLoss(config.parallel_config.dp_mp_config)
-
-        self.slice = P.StridedSlice().shard(((dp, 1),))
-        self.not_equal = P.NotEqual().shard(((dp, 1), ()))
-        self.slice2 = P.StridedSlice().shard(((dp, 1, 1),))
-        self.eod_mask_loss = config.eod_mask_loss
-        if config.single_checkpoint_name_or_path != "":
-            config.checkpoint_name_or_path = config.single_checkpoint_name_or_path
-            self.load_checkpoint(config)
-
-    # pylint: disable=W0613
-    def construct(self, input_ids, input_position=None, attention_mask=None, position_ids=None,
-                  input_embeds=None, labels=None, init_reset=True, batch_valid_length=None):
-        r"""Forward process of the codegeex model"""
-        batch_size, seq_length = input_ids.shape
-
-        if self.training:
-            seq_length = seq_length - 1
-            tokens = self.slice(input_ids, (0, 0),
-                                (batch_size, seq_length), (1, 1))
-            input_position = self.slice(
-                input_position, (0, 0), (batch_size, seq_length), (1, 1))
-            attention_mask = self.cast(attention_mask, mstype.float16)
-            input_mask = F.ones_like(tokens)
-            if self.eod_mask_loss:
-                input_mask = F.cast(self.not_equal(
-                    tokens, self.eod_token), mstype.float32)
-        else:
-            tokens = input_ids
-            input_position = F.tuple_to_array(F.make_range(seq_length))
-            input_position = P.Tile()(input_position, (batch_size, 1))
-            input_mask = F.cast(F.not_equal(
-                tokens, self.pad_token), mstype.float32)
-            if self.is_first_iteration is False:
-                attention_mask = P.Tile()(
-                    Tensor(np.ones((1, 1, 2048)), mstype.float32), (batch_size, 1, 1))
-            else:
-                attention_mask = self.get_attention_mask(input_mask)
-            batch_valid_length -= 1
-        logits, vocab_table = self.backbone(
-            tokens, input_position, attention_mask, init_reset, batch_valid_length)
-        logits = self.head(logits, vocab_table)
-        if not self.training:
-            return (logits,)
-        # Get label corresponding to input tokens
-        labels = self.slice(input_ids, (0, 1),
-                            (batch_size, seq_length + 1), (1, 1))
-        labels = P.Reshape()(labels, (-1,))
-        input_mask = P.Reshape()(input_mask, (-1,))
-        output = self.loss(logits, labels, input_mask)
-        return output
diff --git a/research/codegeex/convert_weight.py b/research/codegeex/convert_weight.py
deleted file mode 100644
index 0580fd7f..00000000
--- a/research/codegeex/convert_weight.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Convert checkpoint from torch"""
-import argparse
-import torch
-from mindspore import save_checkpoint, Tensor
-
-
-def generate_params_dict(total_layers,
-                         mindspore_params_per_layer,
-                         torch_params_per_layer,
-                         mindspore_top_layer,
-                         torch_top_layer):
-    """
-    Generate the total parameter mapping of mindspore and pytorch.
-
-    Args:
-        total_layers(int): The total layers of the net.
-        mindspore_params_per_layer(list): The list of params per layer for the net of mindspore.
-        torch_params_per_layer(list): The list of params per layer for the net of pytorch.
-        mindspore_additional_params(list): The list of params outside the layer for the net of mindspore
-        torch_additional_params(list): The list  of params outside the layer for the net of pytorch.
-
-    Returns:
-        A list of tuple. The first element is the parameter name of mindspore,
-        the another is the parameter name of pytorch.
-    """
-    map_params = list(
-        zip(mindspore_params_per_layer, torch_params_per_layer))
-    output_dict = {}
-    for i in range(total_layers):
-        for ms_para, torch_para in map_params:
-            src = ms_para.format(i)
-            tgt = torch_para.format(i)
-            output_dict[tgt] = src
-    for ms_para, torch_para in zip(mindspore_top_layer, torch_top_layer):
-        output_dict[torch_para] = ms_para
-    return output_dict
-
-
-def print_dict(input_dict):
-    """
-    Print the keys and values of input dict
-
-    Args:
-        input_dict(dict): input dict with key and value.
-
-    Returns:
-        None
-    """
-    for k, v in input_dict.items():
-        print(f"Param: {k} with shape {v}")
-
-
-def walk_dict(state_dict, mapped_param: dict):
-    """Transfer params"""
-    new_ckpt_list = []
-    print("Converting Embedding layers...")
-    word_embeddings = state_dict['module']['language_model']['embedding']['word_embeddings']['weight']
-    new_ckpt_list.append({"data": Tensor(word_embeddings.cpu().numpy(
-    )), "name": "backbone.embedding.word_embedding.embedding_table"})
-    position_embeddings = state_dict['module']['language_model']['embedding']['position_embeddings']['weight']
-    new_ckpt_list.append({"data": Tensor(position_embeddings.cpu().numpy(
-    )), "name": "backbone.embedding.position_embedding.embedding_table"})
-
-    print("Converting QueryEmbedding layers...")
-    query_embeddings = state_dict['module']['language_model']['topQueryEmbedding']['top_query_embeddings']['weight']
-    new_ckpt_list.append({"data": Tensor(query_embeddings.cpu().numpy(
-    )), "name": "backbone.top_query_embedding.embedding_table"})
-
-    print("Converting FinalLayerNorm layers...")
-    final_layernorm_weight = state_dict['module']['language_model']['transformer']['final_layernorm.weight']
-    new_ckpt_list.append({"data": Tensor(
-        final_layernorm_weight.cpu().numpy()), "name": "backbone.layernorm.gamma"})
-    final_layernorm_bias = state_dict['module']['language_model']['transformer']['final_layernorm.bias']
-    new_ckpt_list.append({"data": Tensor(
-        final_layernorm_bias.cpu().numpy()), "name": "backbone.layernorm.beta"})
-
-    print("Converting Transformer layers...")
-    for layer_name in state_dict['module']['language_model']['transformer'].keys():
-        params = state_dict['module']['language_model']['transformer'][layer_name]
-        if layer_name in mapped_param.keys():
-            if "h_to_4h.weight" in layer_name or "4h_to_h.weight" in layer_name \
-                or "attention.dense.weight" in layer_name:
-                new_ckpt_list.append(
-                    {"data": Tensor(params.cpu().numpy().T), "name": mapped_param[layer_name]})
-            else:
-                new_ckpt_list.append(
-                    {"data": Tensor(params.cpu().numpy()), "name": mapped_param[layer_name]})
-    return new_ckpt_list
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="OPT convert script")
-    parser.add_argument('--layers',
-                        type=int,
-                        default=39,
-                        help="The number of layers of the model to be converted.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default=None,
-                        required=True,
-                        help="The torch checkpoint path.")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        required=True,
-                        default="The output mindspore checkpoint path.",
-                        help="Use device nums, default is 128.")
-
-    opt = parser.parse_args()
-    para_dict = torch.load(opt.torch_path, map_location='cpu')
-
-    ms_name = [
-        "backbone.blocks.{}.layernorm1.gamma",
-        "backbone.blocks.{}.layernorm1.beta",
-        "backbone.blocks.{}.layernorm2.gamma",
-        "backbone.blocks.{}.layernorm2.beta",
-        "backbone.blocks.{}.attention.projection.weight",
-        "backbone.blocks.{}.attention.projection.bias",
-        "backbone.blocks.{}.attention.dense1.weight",
-        "backbone.blocks.{}.attention.dense1.bias",
-        "backbone.blocks.{}.attention.dense2.weight",
-        "backbone.blocks.{}.attention.dense2.bias",
-        "backbone.blocks.{}.attention.dense3.weight",
-        "backbone.blocks.{}.attention.dense3.bias",
-        "backbone.blocks.{}.output.mapping.weight",
-        "backbone.blocks.{}.output.mapping.bias",
-        "backbone.blocks.{}.output.projection.weight",
-        "backbone.blocks.{}.output.projection.bias",
-    ]
-
-    torch_name = [
-        'layers.{}.input_layernorm.weight',
-        'layers.{}.input_layernorm.bias',
-        'layers.{}.post_attention_layernorm.weight',
-        'layers.{}.post_attention_layernorm.bias',
-        'layers.{}.attention.dense.weight',
-        'layers.{}.attention.dense.bias',
-        'layers.{}.attention.query.weight',
-        'layers.{}.attention.query.bias',
-        'layers.{}.attention.key.weight',
-        'layers.{}.attention.key.bias',
-        'layers.{}.attention.value.weight',
-        'layers.{}.attention.value.bias',
-        'layers.{}.mlp.dense_h_to_4h.weight',
-        'layers.{}.mlp.dense_h_to_4h.bias',
-        'layers.{}.mlp.dense_4h_to_h.weight',
-        'layers.{}.mlp.dense_4h_to_h.bias'
-    ]
-
-    ms_top_layer_name = [
-        "backbone.top_query_layer.layernorm1.gamma",
-        "backbone.top_query_layer.layernorm1.beta",
-        "backbone.top_query_layer.layernorm2.gamma",
-        "backbone.top_query_layer.layernorm2.beta",
-        "backbone.top_query_layer.attention.projection.weight",
-        "backbone.top_query_layer.attention.projection.bias",
-        "backbone.top_query_layer.attention.dense1.weight",
-        "backbone.top_query_layer.attention.dense1.bias",
-        "backbone.top_query_layer.attention.dense2.weight",
-        "backbone.top_query_layer.attention.dense2.bias",
-        "backbone.top_query_layer.attention.dense3.weight",
-        "backbone.top_query_layer.attention.dense3.bias",
-        "backbone.top_query_layer.output.mapping.weight",
-        "backbone.top_query_layer.output.mapping.bias",
-        "backbone.top_query_layer.output.projection.weight",
-        "backbone.top_query_layer.output.projection.bias",
-    ]
-
-    torch_top_layer_name = [
-        'topQueryLayer.input_layernorm.weight',
-        'topQueryLayer.input_layernorm.bias',
-        'topQueryLayer.post_attention_layernorm.weight',
-        'topQueryLayer.post_attention_layernorm.bias',
-        'topQueryLayer.attention.dense.weight',
-        'topQueryLayer.attention.dense.bias',
-        'topQueryLayer.attention.query.weight',
-        'topQueryLayer.attention.query.bias',
-        'topQueryLayer.attention.key.weight',
-        'topQueryLayer.attention.key.bias',
-        'topQueryLayer.attention.value.weight',
-        'topQueryLayer.attention.value.bias',
-        'topQueryLayer.mlp.dense_h_to_4h.weight',
-        'topQueryLayer.mlp.dense_h_to_4h.bias',
-        'topQueryLayer.mlp.dense_4h_to_h.weight',
-        'topQueryLayer.mlp.dense_4h_to_h.bias'
-    ]
-
-    mapped_params = generate_params_dict(opt.layers,
-                                         ms_name,
-                                         torch_name,
-                                         ms_top_layer_name, torch_top_layer_name)
-
-    # new_ckpt = get_converted_ckpt(mapped_param, state_dict)
-    new_ckpt = walk_dict(para_dict, mapped_params)
-    for item in new_ckpt:
-        print(f"para_name:{item['name']}, shape:{item['data'].shape}")
-    save_checkpoint(new_ckpt, opt.mindspore_path)
-    print(f"Convert finished, the output is saved to {opt.mindspore_path}")
diff --git a/research/codegeex/data_preprocess.py b/research/codegeex/data_preprocess.py
deleted file mode 100644
index eef481de..00000000
--- a/research/codegeex/data_preprocess.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Dataset utils for training
-"""
-
-import os
-from abc import ABC, abstractmethod
-import pickle
-import numpy as np
-from mindspore.mindrecord import FileWriter
-
-
-class Dataset(ABC):
-    @abstractmethod
-    def __len__(self):
-        pass
-
-    @abstractmethod
-    def __getitem__(self, idx):
-        pass
-
-
-class LMDBDataset(Dataset):
-    """Read data from lmdb"""
-    def __init__(self, path, process_fn=None):
-        import lmdb
-
-        self.path = path
-        self.env = lmdb.open(
-            path,
-            max_readers=32,
-            readonly=True,
-            lock=False,
-            readahead=False,
-            meminit=False,
-        )
-        self.process_fn = process_fn
-        if not self.env:
-            raise IOError("Cannot open lmdb dataset", path)
-
-        with self.env.begin(write=False) as txn:
-            self.length = int(txn.get("length".encode("utf-8")).decode("utf-8"))
-
-    def __len__(self):
-        return self.length
-
-    def __getitem__(self, idx):
-        # print(f"Get {self.path}: {idx}")
-        with self.env.begin(write=False) as txn:
-            key = str(idx).encode("utf-8")
-            try:
-                row = pickle.loads(txn.get(key))
-            except TypeError:
-                raise IndexError("Index out of range")
-            if self.process_fn:
-                return self.process_fn(row)
-            return row
-
-
-class PadDataset(Dataset):
-    """Pad data"""
-    def __init__(self, dataset, seq_len, eod_id):
-        self.dataset = dataset
-        self.seq_len = seq_len + 1
-        self.eod_id = eod_id
-
-    def __len__(self):
-        return len(self.dataset)
-
-    def __getitem__(self, idx):
-        item = self.dataset[idx][0]
-        return (item[:self.seq_len],) if self.seq_len <= len(item) else (
-            np.concatenate((item, np.ones(self.seq_len - len(item)) * self.eod_id), axis=0),)
-
-def get_code_data_train(code_data_path, args, process_fn=None):
-    """Get train data"""
-    if os.path.exists(os.path.join(code_data_path, 'data.mdb')):
-        full_path = os.path.join(code_data_path)
-    print(f"Loading code data {full_path}")
-    data = LMDBDataset(
-        full_path,
-        process_fn=process_fn,
-    )
-    data = PadDataset(
-        data,
-        args.seq_length,
-        args.eod_id,
-    )
-    return data
-
-def generate_mindrecord(args, file_name="codegeex.mindrecord"):
-    """Generate mindrecord format data."""
-    data_schema = {"input_ids": {"type": "int32", "shape": [-1]}}
-    writer = FileWriter(file_name, shard_num=1, overwrite=True)
-    writer.add_schema(data_schema, "it is a code dataset")
-
-    data = []
-    train_data = get_code_data_train(args.code_data, args)
-    for i, input_id in enumerate(train_data):
-        print(i)
-        sample = {"input_ids": np.array(input_id).squeeze().astype(np.int32)}
-        data.append(sample)
-        if i > 100:
-            writer.write_raw_data(data)
-            data = []
-
-    if data:
-        print(data)
-        writer.write_raw_data(data)
-
-    writer.commit()
-
-
-if __name__ == "__main__":
-    import argparse
-    args_opt = argparse.ArgumentParser(description="PanguAlpha training")
-    args_opt.add_argument("--seq_length",
-                          type=int,
-                          default=2048,
-                          help="sequence length, default is 2048.")
-    args_opt.add_argument("--eod_id",
-                          type=int, default=50256,
-                          help="The id of end of document")
-    args_opt.add_argument("--eod_reset",
-                          type=int,
-                          default=1,
-                          help="Enable eod mask, default is 1.")
-    args_opt.add_argument('--code_data',
-                          type=str,
-                          help='Location of code data.')
-    args_opt = args_opt.parse_args()
-    print(args_opt)
-    generate_mindrecord(args_opt)
diff --git a/research/codegeex/run_codegeex.py b/research/codegeex/run_codegeex.py
deleted file mode 100644
index bac9e7a9..00000000
--- a/research/codegeex/run_codegeex.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Codegeex Train/Finetune/Eval/Predict scripts."""
-
-import argparse
-
-from mindformers import Trainer
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.tools.utils import str2bool
-
-# pylint: disable=W0611
-import codegeex
-
-def context_init(use_parallel=False, optimizer_parallel=False):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=0, max_device_memory='57GB')
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                enable_parallel_optimizer=optimizer_parallel,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def main(task='text_generation',
-         config='run_codegeex.yaml',
-         run_mode='train',
-         use_parallel=False,
-         ckpt=None,
-         resume=False,
-         train_dataset='',
-         eval_dataset='',
-         predict_data='',
-         max_length=512,
-         op=True):
-    """main function."""
-
-    # 环境初始化
-    context_init(use_parallel, op)
-
-    # 定义任务，预先准备好相应数据集
-
-    if run_mode == 'train':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.train(train_checkpoint=ckpt, resume=resume)
-    elif run_mode == 'finetune':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.finetune(finetune_checkpoint=ckpt, resume=resume)
-    elif run_mode == 'eval':
-        trainer = Trainer(args=config,
-                          task=task,
-                          eval_dataset=eval_dataset)
-        trainer.evaluate(eval_checkpoint=ckpt)
-    elif run_mode == 'predict':
-        trainer = Trainer(args=config,
-                          task=task)
-        result = trainer.predict(input_data=predict_data,
-                                 predict_checkpoint=ckpt, max_length=int(max_length))
-        print(result)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--task', default='text_generation', type=str,
-                        help='set task type.')
-    parser.add_argument('--config', default='run_codegeex.yaml', type=str,
-                        help='set task type.')
-    parser.add_argument('--run_mode', default='train', type=str,
-                        help='set run mode for model.')
-    parser.add_argument('--use_parallel', default=True, type=str2bool,
-                        help='open parallel for model.')
-    parser.add_argument('--load_checkpoint', default="", type=str,
-                        help='checkpoint name or dir to load.')
-    parser.add_argument('--resume', default=False, type=str2bool,
-                        help='whether resume training.')
-    parser.add_argument('--train_dataset', default='', type=str,
-                        help='set train dataset.')
-    parser.add_argument('--eval_dataset', default='', type=str,
-                        help='set eval dataset.')
-    parser.add_argument('--predict_data', default='', type=str,
-                        help='input predict data.')
-    parser.add_argument('--predict_length', default=512, type=int,
-                        help='max length for predict output.')
-    parser.add_argument('--optimizer_parallel', default=True, type=str2bool,
-                        help='whether use optimizer parallel. Default: None')
-    args = parser.parse_args()
-
-    main(task=args.task,
-         config=args.config,
-         run_mode=args.run_mode,
-         use_parallel=args.use_parallel,
-         ckpt=args.load_checkpoint,
-         resume=args.resume,
-         train_dataset=args.train_dataset,
-         eval_dataset=args.eval_dataset,
-         predict_data=args.predict_data,
-         max_length=args.predict_length,
-         op=args.optimizer_parallel)
diff --git a/research/codegeex/run_codegeex_910b.yaml b/research/codegeex/run_codegeex_910b.yaml
deleted file mode 100644
index 434f6e80..00000000
--- a/research/codegeex/run_codegeex_910b.yaml
+++ /dev/null
@@ -1,170 +0,0 @@
-seed: 0
-run_mode: 'train'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "57GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# runner
-runner_config:
-  epochs: 2
-  batch_size: 2
-  sink_mode: True
-  sink_size: 2
-
-runner_wrapper:
-  type: MFTrainOneStepCell
-  enable_global_norm: True
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4294967296
-    scale_factor: 2
-    scale_window: 1000
-
-
-# parallel
-use_parallel: True
-parallel:
-  parallel_mode: 1 # 0-dataset, 1-semi, 2-auto, 3-hybrid
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  micro_batch_num: 1
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: True
-  mp_comm_recompute: False
-  recompute_slice_activation: True
-
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'codegeex_13b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-eval_step_interval: -1        # num of step intervals between each eval, -1 means no step end eval.
-eval_epoch_interval: 40        # num of epoch intervals between each eval, 1 means eval on every epoch end.
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids"]
-  output_columns: ["input_ids", "position_id", "attention_mask"]
-  eod_reset: True
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 16
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# model
-model:
-  model_config:
-    type: PanguAlphaConfig
-    batch_size: 16
-    seq_length: 2048
-    vocab_size: 52224
-    hidden_size: 5120
-    ffn_hidden_size: 20480
-    num_layers: 40
-    num_heads: 40
-    pad_token_id: 50256
-    eod_token_id: 50256
-    eos_token_id: 50256
-    post_layernorm_residual: False
-    dropout_rate: 0.1
-    embedding_dropout_prob: 0.1
-    hidden_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    param_init_type: 'float16'
-    compute_dtype: 'float16'
-    softmax_compute_type: 'float16'
-    hidden_act: 'fast_gelu'
-    use_past: False
-    use_moe: False
-    expert_num: 1
-    per_token_num_experts_chosen: 1
-    checkpoint_name_or_path: "codegeex_13b"
-    single_checkpoint_name_or_path: ""
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 100
-    top_p: 0.95
-    temperature: 0.8
-    do_sample: True
-    eod_mask_loss: False
-  arch:
-    type: CodeGeexHeadModel
-
-# lr schedule
-lr_schedule:
-  type: polynomial
-  learning_rate: 0.00005
-  lr_end: 0.000001
-  warmup_steps: 2000
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-lr_scale: False
-lr_scale_factor: 256
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 0.00000001 # 1e-8
-  weight_decay: 0.1
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "Codegeex-13b"
-    save_checkpoint_steps: 500
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
\ No newline at end of file
diff --git a/research/knowlm/convert_reversed.py b/research/knowlm/convert_reversed.py
deleted file mode 100644
index 3cb1ea8d..00000000
--- a/research/knowlm/convert_reversed.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Convert llama weight.
-Support mindspore format.
-"""
-import argparse
-import torch
-import mindspore as ms
-
-from mindformers.utils.convert_utils import ms2pt
-
-
-def name_replace(name: str):
-    """replace hf param name to ms."""
-    name = name.replace('.norm_out.', '.norm.')
-    name = name.replace('.ffn_norm.', '.post_attention_layernorm.')
-    name = name.replace('.attention_norm.', '.input_layernorm.')
-    name = name.replace('.feed_forward.w3.', '.mlp.up_proj.')
-    name = name.replace('.feed_forward.w2.', '.mlp.down_proj.')
-    name = name.replace('.feed_forward.w1.', '.mlp.gate_proj.')
-    name = name.replace('.attention.wo.', '.self_attn.o_proj.')
-    name = name.replace('.attention.wv.', '.self_attn.v_proj.')
-    name = name.replace('.attention.wk.', '.self_attn.k_proj.')
-    name = name.replace('.attention.wq.', '.self_attn.q_proj.')
-    name = name.replace('tok_embeddings.embedding_weight', 'embed_tokens.weight')
-
-    return name
-
-
-# pylint: disable=W0613
-def convert_ms_to_pt(input_path, output_path, dtype=None, **kwargs):
-    """convert hf weight to ms."""
-    print(f"Trying to convert mindspore checkpoint in '{input_path}'.", flush=True)
-
-    param_dict = ms.load_checkpoint(input_path)
-
-    state_dict = {}
-    for name, value in param_dict.items():
-        print(f'\rprocessing parameter: {name} {value.shape}     ', end='', flush=True)
-        value = ms2pt(value, dtype)
-        name = name_replace(name)
-        state_dict[name] = value
-
-    torch.save(state_dict, output_path)
-    print(f"\rConvert mindspore checkpoint finished, the huggingface checkpoint is saved in '{output_path}'.",
-          flush=True)
-    return True
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--mindspore_ckpt_path', default='knowlm.ckpt')
-    parser.add_argument('--torch_bin_path', default='knowlm.bin')
-    args = parser.parse_args()
-    convert_ms_to_pt(args.mindspore_ckpt_path, args.torch_bin_path)
diff --git a/research/knowlm/convert_weight.py b/research/knowlm/convert_weight.py
deleted file mode 100644
index c25a787c..00000000
--- a/research/knowlm/convert_weight.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Convert llama weight.
-Support huggingface format and Meta format.
-"""
-
-import os
-import json
-import argparse
-import mindspore as ms
-
-from mindformers.utils.convert_utils import pt2ms
-
-
-def read_json(path):
-    with open(path, "r") as f:
-        return json.load(f)
-
-
-def name_replace(name: str):
-    """replace hf param name to ms."""
-    name = name.replace('embed_tokens.weight', 'tok_embeddings.embedding_weight')
-    name = name.replace('.self_attn.q_proj.', '.attention.wq.')
-    name = name.replace('.self_attn.k_proj.', '.attention.wk.')
-    name = name.replace('.self_attn.v_proj.', '.attention.wv.')
-    name = name.replace('.self_attn.o_proj.', '.attention.wo.')
-    name = name.replace('.mlp.gate_proj.', '.feed_forward.w1.')
-    name = name.replace('.mlp.down_proj.', '.feed_forward.w2.')
-    name = name.replace('.mlp.up_proj.', '.feed_forward.w3.')
-    name = name.replace('.input_layernorm.', '.attention_norm.')
-    name = name.replace('.post_attention_layernorm.', '.ffn_norm.')
-    name = name.replace('.norm.', '.norm_out.')
-    return name
-
-
-# pylint: disable=W0613
-def convert_pt_to_ms(input_path, output_path, dtype=None, **kwargs):
-    """convert hf weight to ms."""
-    ckpt_dir = os.path.dirname(input_path)
-    print(f"Trying to convert huggingface checkpoint in '{ckpt_dir}'.", flush=True)
-    try:
-        from transformers import LlamaForCausalLM
-    except:
-        raise ImportError(f"Failed to load huggingface checkpoint. Please make sure transformers is available.")
-
-    try:
-        model_hf = LlamaForCausalLM.from_pretrained(ckpt_dir)
-        args_hf = read_json(os.path.join(ckpt_dir, "config.json"))
-        print(args_hf)
-    # pylint: disable=W0703
-    except Exception as e:
-        print(f"Error {e}.", flush=True)
-        return False
-    ckpt_list = []
-    for name, value in model_hf.state_dict().items():
-        name = name_replace(name)
-        print(f'\rprocessing parameter: {name} {value.shape}     ', end='', flush=True)
-        ckpt_list.append({'name': name, 'data': pt2ms(value, dtype)})
-    ms.save_checkpoint(ckpt_list, output_path)
-    print(f"\rConvert huggingface checkpoint finished, the mindspore checkpoint is saved in '{output_path}'.",
-          flush=True)
-    return True
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--torch_bin_path', default='knowlm.bin')
-    parser.add_argument('--mindspore_ckpt_path', default='knowlm.ckpt')
-    args = parser.parse_args()
-    convert_pt_to_ms(args.torch_bin_path, args.mindspore_ckpt_path)
diff --git a/research/knowlm/generate.py b/research/knowlm/generate.py
deleted file mode 100644
index 1512a350..00000000
--- a/research/knowlm/generate.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-'''Knowlm weight convert'''
-import yaml
-from mindspore import context
-from mindformers.pipeline import pipeline
-from mindformers import LlamaForCausalLM, LlamaConfig, LlamaTokenizer
-
-context.set_context(device_target="Ascend")
-with open("./knowlm.yaml", 'r') as file:
-    knowlm_data = yaml.load(file, Loader=yaml.FullLoader)
-
-# init knowlm-13b-zhixi model
-knowlm_model_path = "/path/to/your/weight.ckpt" # knowlm-13B-zhixi ckpt path
-knowlm_config = LlamaConfig(
-    seq_length=knowlm_data['model_config']['seq_length'],
-    vocab_size=knowlm_data['model_config']['vocab_size'],
-    pad_token_id=knowlm_data['model_config']['pad_token_id'],
-    checkpoint_name_or_path=knowlm_data['model_config']['checkpoint_name_or_path'],
-    hidden_size=knowlm_data['model_config']['hidden_size'],
-    num_layers=knowlm_data['model_config']['num_layers'],
-    num_heads=knowlm_data['model_config']['num_heads'],
-    rms_norm_eps=knowlm_data['model_config']['rms_norm_eps']
-)
-knowlm_model = LlamaForCausalLM(
-    config=knowlm_config
-)
-# init knowlm-13b-zhixi tokenizer
-tokenizer_path = "/path/to/your/tokenizer" # knowlm-13B-zhixi tokenizer.model path
-tokenizer = LlamaTokenizer(
-    vocab_file=tokenizer_path
-)
-pipeline_task = pipeline("text_generation", model=knowlm_model, tokenizer=tokenizer, max_length=32)
-peline_result = pipeline_task(
-    "你非常了解一些健康生活的习惯，请列举几个健康生活的建议",
-    top_k=3,
-    do_sample=True,
-    top_p=0.95,
-    repetition_penalty=1.3,
-    max_length=256)
-
-print(peline_result)
diff --git a/research/knowlm/knowlm.md b/research/knowlm/knowlm.md
deleted file mode 100644
index 5a8a5db6..00000000
--- a/research/knowlm/knowlm.md
+++ /dev/null
@@ -1,133 +0,0 @@
-# KnowLM
-
-KnowLM是一个知识增强的开源语言大模型框架，旨在提供灵活且可定制的工具集和并发布相应的模型，帮助研究人员和开发者更好地处理大模型知识更新和知识谬误等问题，具体包括：
-
-1.**知识提示**：基于知识提示技术从知识图谱生成和优化指令数据以解决知识抽取问题
-
-2.**知识编辑**：基于知识编辑技术对齐大模型内过时及价值观不正确的知识以解决知识谬误问题
-
-3.**知识交互**：基于知识交互技术实现工具组合学习及多智能体协作以解决语言模型具身认知问题
-
-现阶段KnowLM已发布基于LLaMA1的13B基础模型一个（KnowLM-13B-Base），知识抽取大模型一个（KnowLM-13B-ZhiXi，KnowLM-13B-IE2个版本）。
-
-项目主页：[KnowLM](https://github.com/zjunlp/KnowLM)
-
-## KnowLM-13B-ZhiXi
-
-KnowLM-13B-Base以 LlaMA-13B 为基础，使用中英文双语数据进行了二次预训练，提高了模型对中文的理解能力。KnowLM-13B-ZhiXi在 Knowlm-13B-Base 的基础上，利用知识图谱转换指令技术生成数据对该模型进行了微调。详情请参考[KnowLM](https://github.com/zjunlp/KnowLM)项目
-
-```text
-@misc{knowlm,
-  author = {Ningyu Zhang and Jintian Zhang and Xiaohan Wang and Honghao Gui and Kangwei Liu and Yinuo Jiang and Xiang Chen and Shengyu Mao and Shuofei Qiao and Yuqi Zhu and Zhen Bi and Jing Chen and Xiaozhuan Liang and Yixin Ou and Runnan Fang and Zekun Xi and Xin Xu and Lei Li and Peng Wang and Mengru Wang and Yunzhi Yao and Bozhong Tian and Yin Fang and Guozhou Zheng and Huajun Chen},
-  title = {KnowLM: An Open-sourced Knowledgeable Large Language Model Framework},
-  year = {2023},
- url = {http://knowlm.zjukg.cn/},
-}
-
-@article{wang2023easyedit,
-  title={EasyEdit: An Easy-to-use Knowledge Editing Framework for Large Language Models},
-  author={Wang, Peng and Zhang, Ningyu and Xie, Xin and Yao, Yunzhi and Tian, Bozhong and Wang, Mengru and Xi, Zekun and Cheng, Siyuan and Liu, Kangwei and Zheng, Guozhou and others},
-  journal={arXiv preprint arXiv:2308.07269},
-  year={2023}
-}
-
-@article{yao2023editing,
-  title={Editing Large Language Models: Problems, Methods, and Opportunities},
-  author={Yao, Yunzhi and Wang, Peng and Tian, Bozhong and Cheng, Siyuan and Li, Zhoubo and Deng, Shumin and Chen, Huajun and Zhang, Ningyu},
-  journal={arXiv preprint arXiv:2305.13172},
-  year={2023}
-}
-```
-
-## 快速使用
-
-### KnowLM-13B-ZhiXi 预训练权重转换
-
-从huggingface下载[KnowLM-13B-ZhiXi](https://huggingface.co/zjunlp/knowlm-13b-zhixi/tree/main);把文件全部下载下来
-
-执行权重转换脚本
-
-```shell
-python research/knowlm/convert_weight.py --torch_bin_path TORCH_BIN_PATH --mindspore_ckpt_path MS_CKPT_PATH
-```
-
-```text
-# 参数说明
-TORCH_BIN_PATH: huggingface权重保存目录下任意权重bin文件，根据该文件路径读取目录下所有权重
-MS_CKPT_PATH: mindspore权重保存文件路径
-```
-
-### API方式调用
-
-> 需开发者提前pip安装。具体接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
-> 遵从Knowlm-13B-zhixi的license，本模型需要用户自行下载权重进行处理
-
-- pipeline接口开启快速推理
-
-```python
-from mindspore import context
-from mindformers.pipeline import pipeline
-from mindformers import LlamaForCausalLM, LlamaConfig, LlamaTokenizer
-
-context.set_context(device_target="Ascend")
-# init knowlm-13b-zhixi model
-knowlm_model_path = "/path/to/your/weight.ckpt" # knowlm-13B-zhixi ckpt path
-knowlm_config = LlamaConfig(
-    seq_length=2048,
-    vocab_size=32000,
-    pad_token_id=0,
-    checkpoint_name_or_path=knowlm_model_path,
-    hidden_size=5120,
-    num_layers=40,
-    num_heads=40,
-    rms_norm_eps=1e-6
-)
-knowlm_model = LlamaForCausalLM(
-    config=knowlm_config
-)
-# init knowlm-13b-zhixi tokenizer
-tokenizer_path = "/path/to/your/tokenizer" # knowlm-13B-zhixi tokenizer.model path
-tokenizer = LlamaTokenizer(
-    vocab_file=tokenizer_path
-)
-pipeline_task = pipeline("text_generation", model=knowlm_model, tokenizer=tokenizer, max_length=32)
-peline_result = pipeline_task("你非常了解一些健康生活的习惯，请列举几个健康生活的建议", top_k=3, do_sample=True, top_p=0.95, repetition_penalty=1.3, max_length=256)
-
-print(peline_result)
-#你非常了解一些健康生活的习惯，请列举几个健康生活的建议：1.每天坚持锻炼30分钟以上。 2.不吸烟，不酗酒。 3.少吃高脂肪食物。 4.多吃蔬菜和水果。 5.保证充足的睡眠。 6.保持良好的心情。 7.定期体检。 8.养成良好的卫生习惯
-```
-
-### KnowLM-13B-ZhiXi Lora微调训练
-
-#### 前期准备
-
-环境要求和微调准备参考[llama-7b-lora的前期准备](https://gitee.com/rolnan_f/mindformers/blob/dev/docs/model_cards/llama.md#%E5%89%8D%E6%9C%9F%E5%87%86%E5%A4%87)
-
-#### 数据集准备
-
-微调训练采用的数据集为alpaca数据集，数据处理部分可以参考[llama-7b的数据处理过程](https://gitee.com/rolnan_f/mindformers/blob/dev/docs/model_cards/llama.md#%E6%95%B0%E6%8D%AE%E9%9B%86%E5%87%86%E5%A4%87-%E5%BE%AE%E8%B0%83)
-
-给出了knowlm-13b-zhixi适配的lora配置文件-run_knowlm_13b.yaml
-
-#### 脚本启动
-
-```sh
-cd scripts
-# 单卡启动
-bash run_standalone.sh run_knowlm_13b.yaml [DEVICE_ID] finetune
-# 多卡启动（以单机八卡为例）
-bash run_distribute.sh [RANK_TABLE_FILE] run_knowlm_13b.yaml [0,8] finetune
-```
-
-### 训练速度和精度
-
-我们在华为昇腾NPUAscend 910 32GB显存上进行训练，采用了fp32（单精度浮点数）的数据格式进行计算。在每个step中，Lora所需的时间约为2480ms，同时，每秒处理的样本数是0.81samples s/p
-
-在不同数据集上的精度如下
-|f1|A800-3epoch|v100-1.6epoch|Ascend-3epoch|
-|-|-|-|-|
-|GIDS|68.64|74.04|76.23|
-|NYT11|72.43|75.51|75.14|
-|SciERC|25.15|37.28|36.49|
-|kbp37|93.44|95.48|95.73|
-
diff --git a/research/knowlm/knowlm.yaml b/research/knowlm/knowlm.yaml
deleted file mode 100644
index a7a0df78..00000000
--- a/research/knowlm/knowlm.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-model_config:
-  seq_length: 2048
-  vocab_size: 32000
-  pad_token_id: 0
-  hidden_size: 5120
-  num_layers: 40
-  num_heads: 40
-  rms_norm_eps: 1.e-6
\ No newline at end of file
diff --git a/research/knowlm/run_knowlm_13b.yaml b/research/knowlm/run_knowlm_13b.yaml
deleted file mode 100644
index 7561dc9d..00000000
--- a/research/knowlm/run_knowlm_13b.yaml
+++ /dev/null
@@ -1,205 +0,0 @@
-seed: 0
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-use_parallel: False
-run_mode: 'finetune'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'llama_13b'
-
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-8 # 1e-8
-  learning_rate: 3.e-4
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 3.e-4
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids",'labels']  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids",'labels']
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# default parallel of device num = 16 for Atlas 800
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 2
-  micro_batch_num: 16
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "knowlm_13b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
-  max_call_depth: 10000
-  max_device_memory: "31GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 5
-
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-dataset, 1-semi, 2-auto, 3-hybrid
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-    hidden_size: 5120
-    num_layers: 40
-    num_heads: 40
-    vocab_size: 32000
-    multiple_of: 256
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float32"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: True
-    offset: 0
-    checkpoint_name_or_path: "knowlm_13b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: LlamaForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<pad>'
-    type: LlamaTokenizer
-    vocab_file: "/path/tokenizer.model"
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 65536
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/rewardmodel/convert_weight_reward.py b/research/rewardmodel/convert_weight_reward.py
deleted file mode 100644
index a2bedb58..00000000
--- a/research/rewardmodel/convert_weight_reward.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Convert checkpoint from huggingface"""
-import os
-import re
-import argparse
-import torch
-import mindspore
-from mindspore import Tensor, Parameter
-
-
-def layer_name_mapping(key):
-    """Convert huggingface PP weights mapping in MindSpore.
-
-    return: split, new_name
-    """
-    prefix = ''
-    if 'transformer' in key:
-        prefix = 'transformer.'
-        key = key.replace('transformer.', '')
-    # Handle first and last layers
-    layer_rename_map = {
-        "word_embeddings.weight": "embedding.word_embedding.embedding_table",
-        "word_embeddings_layernorm.weight": "embedding.norm.gamma",
-        "word_embeddings_layernorm.bias": "embedding.norm.beta",
-        "ln_f.weight": "ln_f.gamma",
-        "ln_f.bias": "ln_f.beta",
-        "input_layernorm.weight": "layernorm1.gamma",
-        "input_layernorm.bias": "layernorm1.beta",
-        "self_attention.query_key_value.weight": "attention.dense{}.weight",
-        "self_attention.query_key_value.bias": "attention.dense{}.bias",
-        "self_attention.dense.weight": "attention.projection.weight",
-        "self_attention.dense.bias": "attention.projection.bias",
-        "post_attention_layernorm.weight": "layernorm2.gamma",
-        "post_attention_layernorm.bias": "layernorm2.beta",
-        "mlp.dense_h_to_4h.weight": "output.mapping.weight",
-        "mlp.dense_h_to_4h.bias": "output.mapping.bias",
-        "mlp.dense_4h_to_h.weight": "output.projection.weight",
-        "mlp.dense_4h_to_h.bias": "output.projection.bias",
-        "lm_head.weight": "head.weight",
-        "lm_head.bias": "head.bias",
-        "v_head.weight": "vhead.vhead.weight",
-    }
-
-    split = False
-    if key in layer_rename_map:
-        return split, prefix + layer_rename_map[key]
-
-    # Handle transformer blocks
-    match = re.match(r'^\w*\.(\d+)\.(\w+\.\w+\.\w+|\w+\.\w+)$', key)
-    layer_number = int(match.group(1))
-    text = match.group(2)
-    if "self_attention.query_key_value" in key:
-        split = True
-    return split, f"{prefix}blocks.{layer_number}." + layer_rename_map[text]
-
-def hf_to_ms(hf_weights, args, ms_dtype=mindspore.float32, for_save=False):
-    """Convert hf layers to ms."""
-    ms_params = {}
-    for k, v in hf_weights.items():
-        print(k, v.shape, v.dtype)
-        split, new_name = layer_name_mapping(k)
-        if split:
-            if 'weight' in new_name:
-                v = v.reshape(args.n_head, 3, args.hidden_size // args.n_head, v.shape[-1])
-                v_list = v.tensor_split(3, dim=1)
-                for i in range(1, 4):
-                    tmp_name = new_name.format(i)
-                    print(v_list[i-1].shape)
-                    tmp_tensor = Tensor(v_list[i-1].reshape(-1, v_list[i-1].shape[-1]).float().numpy(), ms_dtype)
-                    ms_params[tmp_name] = Parameter(tmp_tensor, name=tmp_name)
-            else:
-                v = v.reshape(args.n_head, 3, args.hidden_size // args.n_head)
-                v_list = v.tensor_split(3, dim=1)
-                for i in range(1, 4):
-                    tmp_name = new_name.format(i)
-                    print(v_list[i-1].shape)
-                    tmp_tensor = Tensor(v_list[i-1].reshape(-1).float().numpy(), ms_dtype)
-                    ms_params[tmp_name] = Parameter(tmp_tensor, name=tmp_name)
-        else:
-            if ('projection' in new_name or 'mapping' in new_name) and 'weight' in new_name:
-                new_tensor = Tensor(v.transpose(0, 1).float().numpy(), ms_dtype)
-            else:
-                new_tensor = Tensor(v.float().numpy(), ms_dtype)
-            ms_params[new_name] = Parameter(new_tensor, name=new_name)
-
-    if for_save:
-        return [{'name': k, 'data': v} for k, v in ms_params.items()]
-
-    return ms_params
-
-def process_hf_shard_files(file_list, args, save_dir=None, combine=False, ms_dtype=mindspore.float32):
-    ''' torch ckpt files loop'''
-    if save_dir and not os.path.exists(save_dir):
-        os.makedirs(save_dir, exist_ok=True)
-
-    combine_params = []
-    file = None
-    for file in file_list:
-        pt_states = torch.load(file, map_location='cpu')
-        ms_params = hf_to_ms(pt_states, args, ms_dtype, True)
-        if combine:
-            combine_params.extend(ms_params)
-        else:
-            save_file = save_dir + '/' + file.split('/')[-1] if save_dir else file + '.ckpt'
-            mindspore.save_checkpoint(ms_params, save_file)
-
-        del pt_states
-        del ms_params
-
-    if combine:
-        path = save_dir + '/' + 'combine.ckpt' if save_dir else \
-            '/'.join(file.split('/')[:-1]) + 'combine.ckpt'
-        mindspore.save_checkpoint(combine_params, path)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Bloom convert script")
-    parser.add_argument('--n_head',
-                        type=int,
-                        default=32,
-                        required=False,
-                        help="The number of head of the model to be converted.")
-    parser.add_argument('--hidden_size',
-                        type=int,
-                        default=4096,
-                        required=False,
-                        help="The number of hidden size of the model to be converted.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="/home/qianjiahong/ckpt/pytorch_model.bin",
-                        required=False,
-                        help="The input torch checkpoint path.")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        required=False,
-                        default="/home/qianjiahong/ckpt/pretrain",
-                        help="The output mindspore checkpoint path.")
-    config = parser.parse_args()
-
-    # convert hf ckpt to ms
-    process_hf_shard_files(file_list=[config.torch_path], args=config,
-                           combine=True, save_dir=config.mindspore_path)
diff --git a/research/rewardmodel/run_bloom_7.1b_reward.yaml b/research/rewardmodel/run_bloom_7.1b_reward.yaml
deleted file mode 100644
index 1eeb49b7..00000000
--- a/research/rewardmodel/run_bloom_7.1b_reward.yaml
+++ /dev/null
@@ -1,188 +0,0 @@
-seed: 0
-run_mode: 'train'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-micro_batch_interleave_num: 1
-
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 8
-
-profile: False
-profile_start_step: 10
-profile_stop_step: 20
-profile_memory: True
-
-model:
-  model_config:
-    type: BloomConfig
-    seq_length: 1024
-    vocab_size: 250880 
-    hidden_size: 4096
-    num_layers: 30
-    num_heads: 32
-    expand_ratio: 4
-    hidden_act: "fast_gelu"
-    hidden_dropout_rate: 0.1
-    attention_dropout_rate: 0.1
-    initializer_range: 0.02
-    param_init_type: "float16"
-    embedding_init_type: "float32"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    compute_dtype: "float16"
-    use_seq_parallel: True
-    use_select_recompute: False
-    checkpoint_name_or_path: "/home/qianjiahong/ckpt/pretrain/combine.ckpt"
-    eos_token_id: 2
-    bos_token_id: 1
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-  arch:
-    type: BloomRewardModel
-
-
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["chosen_input_ids", "chosen_attention_mask",
-                  "rejected_input_ids", "rejected_attention_mask",
-                  "position_id", "loss_mask", "end_ind"]
-  output_columns: ["input_ids", "position_id", "attention_mask", "loss_mask", "end_ind"]
-  num_parallel_workers: 16
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-  pad_token_id: 2
-
-train_dataset_task:
-  type: RewardModelDataset
-  dataset_config: *train_dataset
-
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["chosen_input_ids", "chosen_attention_mask",
-                  "rejected_input_ids", "rejected_attention_mask",
-                  "position_id", "loss_mask", "end_ind"]
-  output_columns: ["input_ids", "position_id", "attention_mask", "loss_mask", "end_ind"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    type: BloomTokenizer
-  type: BloomProcessor
-
-
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_id: 0
-  device_target: "Ascend"
-  max_device_memory: "31GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# parallel
-use_parallel: True
-parallel:
-  parallel_optimizer_config: {"gradient_accumulation_shard": False}
-  parallel_mode: 1 # 0-dataset, 1-semi, 2-auto, 3-hybrid
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: False
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-
-parallel_config:
-  data_parallel: 1
-  model_parallel: 8
-  pipeline_stage: 1
-  micro_batch_num: 1
-  vocab_emb_dp: False 
-  gradient_aggregation_group: 4
-
-recompute_config:
-  recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 4
-  sink_mode: True 
-  sink_size: 2
-  per_epoch_size: 2
-
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4294967296
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0.0 # 0.1
-
-lr_schedule:
-  type: constant_with_warmup
-  learning_rate: 0.000005
-  warmup_steps: 100
-  warmup_lr_init: 0.0
-
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "mindformers"
-    save_checkpoint_steps: 1000
-    keep_checkpoint_max: 3
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'bloom_7.1b'
-
diff --git a/research/skywork/convert_reversed.py b/research/skywork/convert_reversed.py
deleted file mode 100644
index f7875287..00000000
--- a/research/skywork/convert_reversed.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from mindspore"""
-import argparse
-import torch
-import mindspore as ms
-
-from mindformers.utils.convert_utils import ms2pt
-
-
-def name_replace(weight_name: str):
-    """replace weight name"""
-    weight_name = weight_name.replace('.ffn_norm.', '.post_attention_layernorm.')
-    weight_name = weight_name.replace('.attention_norm.', '.input_layernorm.')
-    weight_name = weight_name.replace('.feed_forward.w3.', '.mlp.up_proj.')
-    weight_name = weight_name.replace('.feed_forward.w2.', '.mlp.down_proj.')
-    weight_name = weight_name.replace('.feed_forward.w1.', '.mlp.gate_proj.')
-    weight_name = weight_name.replace('.attention.wo.', '.self_attn.o_proj.')
-    weight_name = weight_name.replace('.attention.wv.', '.self_attn.v_proj.')
-    weight_name = weight_name.replace('.attention.wk.', '.self_attn.k_proj.')
-    weight_name = weight_name.replace('.attention.wq.', '.self_attn.q_proj.')
-    weight_name = weight_name.replace('output.', 'lm_head.')
-    weight_name = weight_name.replace('tok_embeddings.', 'embed_tokens.')
-
-    return weight_name
-
-# pylint: disable=W0613
-def convert_ms_to_pt(input_path, output_path, dtype=None, **kwargs):
-    """
-    convert ms to pt
-    """
-    print(f"Trying to convert mindspore checkpoint in {input_path}.")
-    model_ms = ms.load_checkpoint(input_path)
-
-    state_dict = {}
-    for name, value in model_ms.items():
-        value = ms2pt(value, dtype)
-        if name == 'model.norm_out.weight':
-            name = 'model.norm.weight'
-        if name == 'lm_head.weight':
-            name = 'output.weight'
-        if name == 'model.tok_embeddings.embedding_weight':
-            name = 'model.tok_embeddings.weight'
-        name = name_replace(name)
-        state_dict[name] = value
-        print(name, value.shape)
-
-    torch.save(state_dict, output_path)
-    print(f"Convert finished, the output is saved to {output_path}.")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--mindspore_ckpt_path', default='transform.ckpt')
-    parser.add_argument('--torch_ckpt_path', default='./output.bin')
-
-    args = parser.parse_args()
-    convert_ms_to_pt(args.mindspore_ckpt_path, args.torch_ckpt_path)
diff --git a/research/skywork/convert_weight.py b/research/skywork/convert_weight.py
deleted file mode 100644
index 44559e41..00000000
--- a/research/skywork/convert_weight.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-transform huggingface model to mindspore ckpt.
-"""
-
-import argparse
-import os
-
-import mindspore as ms
-from transformers import LlamaForCausalLM
-
-from mindformers.utils.convert_utils import pt2ms
-
-
-def name_replace(weight_name: str):
-    """replace weight name"""
-    weight_name = weight_name.replace('embed_tokens.', 'tok_embeddings.')
-    weight_name = weight_name.replace('lm_head.', 'output.')
-    weight_name = weight_name.replace('.self_attn.q_proj.', '.attention.wq.')
-    weight_name = weight_name.replace('.self_attn.k_proj.', '.attention.wk.')
-    weight_name = weight_name.replace('.self_attn.v_proj.', '.attention.wv.')
-    weight_name = weight_name.replace('.self_attn.o_proj.', '.attention.wo.')
-    weight_name = weight_name.replace('.mlp.gate_proj.', '.feed_forward.w1.')
-    weight_name = weight_name.replace('.mlp.down_proj.', '.feed_forward.w2.')
-    weight_name = weight_name.replace('.mlp.up_proj.', '.feed_forward.w3.')
-    weight_name = weight_name.replace('.input_layernorm.', '.attention_norm.')
-    weight_name = weight_name.replace('.post_attention_layernorm.', '.ffn_norm.')
-    return weight_name
-
-# pylint: disable=W0613
-def convert_pt_to_ms(input_path, output_path, dtype=None, **kwargs):
-    """
-    convert pt tp ms
-    """
-    print(f"Trying to convert mindspore checkpoint in {input_path}.")
-    model_hf = LlamaForCausalLM.from_pretrained(os.path.dirname(input_path))
-    ckpt_list = []
-    for name, value in model_hf.state_dict().items():
-        name = name_replace(name)
-        if name == 'model.norm.weight':
-            name = 'model.norm_out.weight'
-        if name == 'output.weight':
-            name = 'lm_head.weight'
-        if name == 'model.tok_embeddings.weight':
-            name = 'model.tok_embeddings.embedding_weight'
-        value = pt2ms(value, dtype)
-        print(name, value.shape)
-        ckpt_list.append({'name': name, 'data': value})
-
-    ms.save_checkpoint(ckpt_list, output_path)
-    print(f"Convert finished, the output is saved to {output_path}.")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--torch_ckpt_path', default='./hf.bin')
-    parser.add_argument('--mindspore_ckpt_path', default='transform.ckpt')
-    args = parser.parse_args()
-    convert_pt_to_ms(args.torch_ckpt_path, args.mindspore_ckpt_path)
diff --git a/research/skywork/run_skywork.py b/research/skywork/run_skywork.py
deleted file mode 100644
index 10e7fcb5..00000000
--- a/research/skywork/run_skywork.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Skywork-13B Train/Finetune/Eval/Predict/Export scripts."""
-
-import argparse
-
-from mindformers import Trainer, MindFormerConfig
-from mindformers.tools.utils import check_in_modelarts, set_remote_save_url, str2bool
-from mindformers.core.context import build_context
-
-
-def main(task='text_generation',
-         config='run_skywork_13b.yaml',
-         run_mode='predict',
-         pet_method='',
-         use_parallel=False,
-         resume=False,
-         auto_trans_ckpt=False,
-         train_dataset="",
-         ckpt=None,
-         eval_dataset='',
-         predict_data='',
-         max_length=512,
-         op=True,
-         remote_save_url=None,
-         device_id=None,
-         use_past=False,
-         batch_size=None):
-    """main function."""
-    # 适配aicc
-    if check_in_modelarts() and remote_save_url:
-        print("remote_save_url is %s, the output file will be uploaded to here.", remote_save_url)
-        set_remote_save_url(remote_save_url)
-
-    config_args = MindFormerConfig(config)
-
-    if ckpt:
-        config_args.load_checkpoint = ckpt
-    if device_id:
-        config_args.context.device_id = device_id
-    config_args.parallel.enable_parallel_optimizer = op
-    config_args.auto_trans_ckpt = auto_trans_ckpt
-    config_args.use_parallel = use_parallel
-    config_args.model.model_config.checkpoint_name_or_path = config_args.load_checkpoint
-    config_args.model.model_config.use_past = use_past
-    if batch_size:
-        config_args.model.model_config.batch_size = batch_size
-
-    # 环境初始化
-    build_context(config_args)
-
-    # 定义任务，预先准备好相应数据集
-    if run_mode == 'train':
-        trainer = Trainer(args=config_args,
-                          task=task,
-                          train_dataset=train_dataset,
-                          pet_method=pet_method)
-        trainer.train(train_checkpoint=config_args.load_checkpoint, auto_trans_ckpt=config_args.auto_trans_ckpt,
-                      resume_training=resume)
-    elif run_mode == 'finetune':
-        trainer = Trainer(args=config_args,
-                          task=task,
-                          train_dataset=train_dataset,
-                          pet_method=pet_method)
-        print(trainer)
-        trainer.finetune(finetune_checkpoint=config_args.load_checkpoint, auto_trans_ckpt=config_args.auto_trans_ckpt,
-                         resume_training=resume)
-    elif run_mode == 'eval':
-        trainer = Trainer(args=config_args,
-                          task=task,
-                          eval_dataset=eval_dataset)
-        trainer.evaluate(eval_checkpoint=config_args.load_checkpoint)
-    elif run_mode == 'predict':
-        trainer = Trainer(args=config_args,
-                          task=task)
-        result = trainer.predict(predict_checkpoint=config_args.load_checkpoint, input_data=predict_data,
-                                 max_length=int(max_length))
-        print(result)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--task', default='text_generation', type=str,
-                        help='set task type.')
-    parser.add_argument('--config', default='run_skywork_13b.yaml', type=str,
-                        help='set task type.')
-    parser.add_argument('--run_mode', default='predict', type=str,
-                        help='set run mode for model.')
-    parser.add_argument('--pet_method', default='', type=str,
-                        help='set pet method for low parameter finetune.')
-    parser.add_argument('--use_parallel', default=True, type=str2bool,
-                        help='open parallel for model.')
-    parser.add_argument('--load_checkpoint', default=None, type=str,
-                        help='checkpoint name or dir to load.')
-    parser.add_argument('--auto_trans_ckpt', default=False, type=str2bool,
-                        help='whether auto trans ckpt.')
-    parser.add_argument('--resume', default=False, type=str2bool,
-                        help='whether resume training.')
-    parser.add_argument('--train_dataset', default='', type=str,
-                        help='set train dataset.')
-    parser.add_argument('--eval_dataset', default='', type=str,
-                        help='set eval dataset.')
-    parser.add_argument('--predict_data', default='', type=str,
-                        help='input predict data.')
-    parser.add_argument('--device_id', default=None, type=int,
-                        help='set device id.')
-    parser.add_argument('--predict_length', default=512, type=int,
-                        help='max length for predict output.')
-    parser.add_argument('--batch_size', default=None, type=int,
-                        help='batch_size for export mindir.')
-    parser.add_argument('--optimizer_parallel', default=True, type=str2bool,
-                        help='whether use optimizer parallel. Default: None')
-    parser.add_argument('--remote_save_url', default="", type=str,
-                        help='whether use optimizer parallel. Default: None')
-    parser.add_argument('--use_past', default=False, type=str2bool,
-                        help='whether use past. Default: False')
-    args = parser.parse_args()
-    print(args)
-
-    main(task=args.task,
-         config=args.config,
-         run_mode=args.run_mode,
-         pet_method=args.pet_method,
-         use_parallel=args.use_parallel,
-         resume=args.resume,
-         auto_trans_ckpt=args.auto_trans_ckpt,
-         train_dataset=args.train_dataset,
-         ckpt=args.load_checkpoint,
-         eval_dataset=args.eval_dataset,
-         predict_data=args.predict_data,
-         max_length=args.predict_length,
-         op=args.optimizer_parallel,
-         remote_save_url=args.remote_save_url,
-         device_id=args.device_id,
-         use_past=args.use_past,
-         batch_size=args.batch_size)
diff --git a/research/skywork/run_skywork_13b.yaml b/research/skywork/run_skywork_13b.yaml
deleted file mode 100644
index 771913c4..00000000
--- a/research/skywork/run_skywork_13b.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-seed: 1234
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'skywork_13b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# runner config
-runner_config:
-  epochs: 5
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 1.e-8
-  learning_rate: 5.e-6
-  weight_decay: 0.1
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 5.e-6
-  lr_end: 5.e-6
-  warmup_steps: 0
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "/{path}/AdvertiseGenTrain_text.mindrecord"
-    shuffle: True
-  input_columns: ["input_ids"]  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 16 for Atlas 800T A2
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  optimizer_shard: True
-  micro_batch_num: 16
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-    per_print_times: 1
-  - type: CheckpointMonitor
-    prefix: "skywork_13b"
-    save_checkpoint_steps: 500
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "60GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 4096
-    hidden_size: 4608
-    num_layers: 52
-    num_heads: 36
-    vocab_size: 65519
-    multiple_of: 256
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    ignore_token_id: -100
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    use_flash_attention: False
-    offset: 0
-    checkpoint_name_or_path: ""
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: LlamaForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<unk>'
-    vocab_file: "/{path}/tokenizer.model"
-    type: LlamaTokenizer
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 65536
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/skywork/skywork.md b/research/skywork/skywork.md
deleted file mode 100644
index 3e4cf437..00000000
--- a/research/skywork/skywork.md
+++ /dev/null
@@ -1,579 +0,0 @@
-# 天工
-
-天工Skywork-13B 系列是由昆仑万维研究的大规模语言预训练模型，目前开源的有Skywork-13B-Base，Skywork-13B-Chat，Skywork-13B-Math，Skywork-13B-MM，MindFormers已支持Skywork-13B-Base。
-
-## 前期准备
-
-### 安装mindformers
-
-参考[README](../../README.md#二、mindformers安装)安装mindformers。
-本文操作的相对路径均为安装mindformers后的代码仓根路径。
-
-### 环境要求
-
-- 硬件: Ascend 910 64GB
-- MindSpore: 2.2.0
-- MindSpore Lite: 2.2.0
-- MindFormers: dev
-- Mindpet: 1.0.2
-
-**注** skywork-13b推理可以在单卡上完成部署，全量微调至少需要8卡。
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，在当前路径生成该机器的RANK_TABLE_FILE的json文件，生成的文件名形如hccl_8p_01234567_127.0.0.1.json
-python mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注** 若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成
-
-### Skywork-13B-Base 预训练权重下载和转换
-
-- 下载已转换的ckpt文件
-
-本仓库提供已经转换完成的预训练权重用于训练/微调/推理，用户可自行从下方链接拉取后直接使用。
-
-下载链接：
-
-权重：https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/sky_work/skywork_13b.ckpt
-
-词表：https://huggingface.co/Skywork/Skywork-13B-base/blob/main/tokenizer.model
-
-linux可用如下命令下载。
-
-```shell
-mkdir -p ckpt/rank_0
-cd ./ckpt/rank_0
-wget https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/sky_work/skywork_13b.ckpt
-wget https://huggingface.co/Skywork/Skywork-13B-base/blob/main/tokenizer.model
-cd ../..
-```
-
-- 从huggingface下载原始权重后转换
-
-需要将整个工程下载下来。
-
-[Skywork-13B-Base](https://huggingface.co/Skywork/Skywork-13B-base)
-
-如果使用git命令下载，下载前请先确保已安装git-lfs。
-
-```shell
-git lfs install
-git clone https://huggingface.co/Skywork/Skywork-13B-base
-```
-
-执行权重转换脚本
-
-```shell
-cd research
-python skywork/convert_weight.py --torch_ckpt_path TORCH_CKPT_PATH --mindspore_ckpt_path MS_CKPT_NAME
-```
-
-```text
-# 参数说明
-torch_ckpt_path: huggingface Skywork-13B-Base权重保存目录路径下任意权重bin文件，根据该文件路径读取目录下全部权重
-mindspore_ckpt_path: mindspore权重文件保存路径
-```
-
-**注**: 请安装torch=1.13.1和transformers=4.30.2版本。如果执行报错，请检查并安装requests、decorator、pandas、sympy。
-
-### 模型权重切分与合并
-
-从huggingface或官方github仓库转换而来的权重通常是单卡权重，基于该权重进行多卡微调，评测，推理，涉及ckpt从单机策略到分布式策略的切换。
-
-通常训练采用分布式训练，基于该权重进行评测，推理多采用单卡，涉及ckpt从分布式策略到单机策略的切换。
-
-以上涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档[模型权重切分与合并](../../docs/feature_cards/Transform_Ckpt.md)
-
-# Skywork-13B-Base
-
-Skywork-13B-Base 是在经过高质量清洗过滤的3.2万亿个多语言（主要是中文和英文）和代码数据上进行训练的，它在多种评测和各种基准测试上都展现了同等规模模型的最佳效果。网络结构与llama相同。
-
-## 训练与微调
-
-目前提供了模型的基础配置文件`research/skywork/run_skywork_13b.yaml`。使用前请将配置文件中路径相关参数修改为实际路径。
-
-## 模型性能
-
-| config                                                       | task                  | Datasets  | SeqLength | metric | phase             | score     | performance(tokens/s/p)  |
-| ------------------------------------------------------------ | --------------------- | --------- | --------- | ------ | ----------------- | --------- | ------------ |
-| [skywork_13b](./run_skywork_13b.yaml)    | text_generation       | ADGEN      | 4096      | -      | [train](#预训练)  | -         | 1105.92  |
-| [skywork_13b](./run_skywork_13b.yaml)    | text_generation       | ADGEN      | 4096      | -      | [finetune](#微调)  | -         | 1105.92  |
-
-### 数据集准备
-
-使用Skywork-13B-Base进行训练或者微调时，需要使用Skywork-13B-Base配套的tokenizer.model处理数据集，以及选用Skywork-13B-Base的yaml配置文件进行任务启动。
-
-目前提供[ADGEN](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) （广告生成）数据集的预处理脚本用于全参微调任务。
-
-ADGEN数据集样式
-
-```text
-{
-    "content": "类型#裤*版型#宽松*风格#性感*图案#线条*裤型#阔腿裤",
-    "summary": "宽松的阔腿裤这两年真的吸粉不少，明星时尚达人的心头爱。毕竟好穿时尚，谁都能穿出腿长2米的效果宽松的裤腿，当然是遮肉小能手啊。上身随性自然不拘束，面料亲肤舒适贴身体验感棒棒哒。系带部分增加设计看点，还让单品的设计感更强。腿部线条若隐若现的，性感撩人。颜色敲温柔的，与裤子本身所呈现的风格有点反差萌。"
-}
-{
-    "content": "类型#裙*风格#简约*图案#条纹*图案#线条*图案#撞色*裙型#鱼尾裙*裙袖长#无袖",
-    "summary": "圆形领口修饰脖颈线条，适合各种脸型，耐看有气质。无袖设计，尤显清凉，简约横条纹装饰，使得整身人鱼造型更为生动立体。加之撞色的鱼尾下摆，深邃富有诗意。收腰包臀,修饰女性身体曲线，结合别出心裁的鱼尾裙摆设计，勾勒出自然流畅的身体轮廓，展现了婀娜多姿的迷人姿态。"
-}
-```
-
-- 转换成mindrecord格式
-
-```shell
-cd research
-python skywork/skywork_dataprocess.py --input_file_path /{path}/AdvertiseGenTrain_text.jsonl --output_file /{path}/AdvertiseGenTrain_text.mindrecord --model_file /{path}/tokenizer.model --seq_length 4096
-```
-
-参数说明
-
-```text
-input_file_path：ADGEN数据集输入文件路径
-output_file：生成的mindrecord目标文件路径
-dataset_type：数据集类型，目前仅支持"text"
-model_file：tokenizer.model文件路径
-seq_length：数据长度
-```
-
-### 预训练
-
-- 单机多卡预训练示例
-
-```shell
-cd research
-# Usage Help: bash run_singlenode.sh [START_CMD] [RANK_TABLE_FILE] [DEVICE_RANGE] [DEVICE_NUM]
-bash run_singlenode.sh \
-"python skywork/run_skywork.py \
---config skywork/run_skywork_13b.yaml \
---run_mode finetune \
---train_dataset /{path}/AdvertiseGenTrain_text.mindrecord \
---auto_trans_ckpt True \
---use_parallel True" \
-../hccl_8p_01234567_127.0.0.1.json [0,8] 8
-```
-
-**参数说明**
-
-```text
-START_CMD：Python启动命令，其中
- config：为research/skywork文件夹下面的run_skywork_13b.yaml配置文件，配置文件参数请按需修改
- run_mode：任务运行状态，支持关键字train/finetune/eval/predict/export
- train_dataset：训练数据集路径
- auto_trans_ckpt：是否自动转换ckpt
- use_parallel：是否使用并行模式
-RANK_TABLE_FILE：由 mindformers/tools/hccl_tools.py 生成的分布式json文件
-DEVICE_RANGE：为单机分布式卡的范围，如 '[0,8]' 为8卡分布式，不包含8本身
-DEVICE_NUM：使用的卡的个数
-```
-
-**注**：由于模型较大，未切分的模型当seq_length为4096时，仅能进行batch_size为1的单机8卡训练。如果要使用其他并行策略训练，请参考 [多卡权重切分](../../docs/feature_cards/Transform_Ckpt.md)
-
-### 微调
-
-- 单机多卡微调示例
-
-```shell
-cd research
-# Usage Help: bash run_singlenode.sh [START_CMD] [RANK_TABLE_FILE] [DEVICE_RANGE] [DEVICE_NUM]
-bash run_singlenode.sh \
-"python skywork/run_skywork.py \
---config skywork/run_skywork_13b.yaml \
---run_mode finetune \
---load_checkpoint  /{path}/ \
---train_dataset /{path}/AdvertiseGenTrain_text.mindrecord \
---auto_trans_ckpt True \
---use_parallel True" \
-../hccl_8p_01234567_127.0.0.1.json [0,8] 8
-```
-
-**参数说明**
-
-```text
-START_CMD：Python启动命令，其中
- config：为research/skywork文件夹下面的run_skywork_13b.yaml配置文件，配置文件参数请按需修改
- run_mode：任务运行状态，支持关键字train/finetune/eval/predict/export
- load_checkpoint：权重路径。例如路径形式为/path/ckpt/rank_0/skywork_13b.ckpt，则参数填写为/path/ckpt
- train_dataset：训练数据集路径
- auto_trans_ckpt：是否自动转换ckpt
- use_parallel：是否使用并行模式
-RANK_TABLE_FILE：由 mindformers/tools/hccl_tools.py 生成的分布式json文件
-DEVICE_RANGE：为单机分布式卡的范围，如 '[0,8]' 为8卡分布式，不包含8本身
-DEVICE_NUM：使用的卡的个数
-```
-
-**注**：由于模型较大，未切分的模型当seq_length为4096时，仅能进行batch_size为1的单机8卡训练。如果要使用其他并行策略训练，请参考 [多卡权重切分](../../docs/feature_cards/Transform_Ckpt.md)
-
-## MindSpore推理
-
-> 接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
->
-> 遵从Skywork-13B的license，本模型需要用户自行下载权重进行处理，故使用时和llama存在一定区别，具体如下：
-
-在启动前，请先行在配置文件run_skywork_13b.yaml中将processor.tokenizer.vocab_file的路径配置为实际路径；如果使用增量推理，需要在配置文件中将model.model_config.use_past值设置为True。例如：
-
-```yaml
-processor:
-  return_tensors: ms
-  tokenizer:
-    ...
-    vocab_file: '/path/Skywork-13B/tokenizer.model'  # 修改为实际路径
-    ...
-model:
-  model_config:
-    ...
-    use_past: True
-    ...
-```
-
-- generate接口推理：
-
-```python
-from mindspore import context
-from mindformers.generation import GenerationConfig
-from mindformers.tools.register import MindFormerConfig
-from mindformers import LlamaForCausalLM, LlamaConfig, LlamaTokenizer
-
-context.set_context(device_id=0, mode=0)
-
-# init skywork-13b-Base model
-skywork_model_path = "/path/Skywork-13B/skywork_13b.ckpt"  # 填写实际路径
-config_path = 'skywork/run_skywork_13b.yaml'  # 填写实际路径
-
-config = MindFormerConfig(config_path)
-config.model.model_config.checkpoint_name_or_path = skywork_model_path
-skywork_config = LlamaConfig(**config.model.model_config)
-
-skywork_model = LlamaForCausalLM(config=skywork_config)
-
-# init skywork-13b-Base tokenizer
-tokenizer_path = "/path/Skywork-13B/tokenizer.model"  # 填写实际路径
-tokenizer = LlamaTokenizer(vocab_file=tokenizer_path)
-generation_config = GenerationConfig(
-    temperature=1,
-    top_p=1.0,
-    top_k=1,
-    num_beams=1,
-    eos_token_id=tokenizer.eos_token_id,
-    pad_token_id=tokenizer.pad_token_id,
-    do_sample=True,
-    max_length=128,
-)
-
-inputs = tokenizer("陕西的省会是西安")["input_ids"]
-outputs = skywork_model.generate(inputs, generation_config=generation_config)
-print(tokenizer.decode(outputs))
-
-# 运行结果
-# ['<s>陕西的省会是西安，西安是陕西的政治、经济、文化中心，也是陕西的交通枢纽。西安的交通非常发达，有很多的交通工具，可以方便的到达陕西的各个地方。\n西安的交通工具有：\n1、飞机：西安咸阳国际机场是中国重要的航空港，也是中国大陆第四大航空港。西安咸阳国际机场位于西安市西北方向，距市中心约30公里，有高速公路与市区相连。\n2、火车：西安火车站位于西安市解放路，是西安最大的火车站，也是中国西部地区最大']
-```
-
-- Trainer高阶接口推理
-
-skywork的高阶接口使用脚本已集成在run_skywork.py脚本中，运行此脚本命令示例：
-
-```shell
-cd research
-python skywork/run_skywork.py --config skywork/run_skywork_13b.yaml --load_checkpoint /path/Skywork-13B/skywork_13b.ckpt --run_mode=predict --predict_data "陕西的省会是西安" --predict_length 100 --use_parallel False --device_id 0
-#运行结果：[{'text_generation_text': ['陕西的省会是西安，西安是陕西的政治、经济、文化中心，也是陕西的交通枢纽。西安的交通非常发达，有很多的交通工具，可以方便的到达陕西的各个地方。\n西安的交通工具有：\n1、飞机：西安咸阳国际机场是中国重要的航空港，也是中国大陆第四大航空港。西安咸阳国际机场位于西安市西北方向，距市中心约30公里，有高速公路与']}]
-```
-
-- pipeline接口推理
-
-```python
-from mindspore import context
-from mindformers.pipeline import pipeline
-from mindformers.tools.register import MindFormerConfig
-from mindformers import LlamaForCausalLM, LlamaConfig, LlamaTokenizer
-
-context.set_context(device_id=0, mode=0)
-
-skywork_model_path = "/path/Skywork-13B/skywork_13b.ckpt"  # 填写实际路径
-config_path = 'skywork/run_skywork_13b.yaml'  # 填写实际路径
-config = MindFormerConfig(config_path)
-config.model.model_config.checkpoint_name_or_path = skywork_model_path
-use_past = True  # 按需设置
-config.model.model_config.use_past = use_past
-skywork_config = LlamaConfig(**config.model.model_config)
-
-skywork_model = LlamaForCausalLM(skywork_config)
-
-# init skywork-13b-Base tokenizer
-tokenizer_path = "/path/Skywork-13B/tokenizer.model"  # 填写实际路径
-tokenizer = LlamaTokenizer(tokenizer_path, add_bos_token=True, add_eos_token=False)
-pipeline_task = pipeline("text_generation", model=skywork_model, tokenizer=tokenizer, max_length=32)
-peline_result = pipeline_task("陕西的省会是西安",
-                              top_k=1,
-                              do_sample=False,
-                              top_p=1.0,
-                              repetition_penalty=1,
-                              max_length=128,
-                              eos_token_id=tokenizer.eos_token_id,
-                              pad_token_id=tokenizer.pad_token_id,
-                              use_past=use_past)
-
-print(peline_result)
-
-# 运行结果
-[{'text_generation_text': ['陕西的省会是西安，西安是陕西的政治、经济、文化中心，也是陕西的交通枢纽。西安的交通非常发达，有很多的交通工具，可以方便的到达陕西的各个地方。\n西安的交通工具有：\n1、飞机：西安咸阳国际机场是中国重要的航空港，也是中国大陆第四大航空港。西安咸阳国际机场位于西安市西北方向，距市中心约30公里，有高速公路与市区相连。\n2、火车：西安火车站位于西安市解放路，是西安最大的火车站，也是中国西部地区最大']}]
-```
-
-## MindSpore Lite推理
-
-### ckpt转换为mindir
-
-```shell
-# 如果需要使用增量推理，use_past设置为True；如果
-cd research
-python skywork/run_skywork.py --config skywork/run_skywork_13b.yaml --load_checkpoint /path/Skywork-13B/skywork_13b.ckpt --run_mode=export --use_parallel False --use_past True --batch_size 1 --device_id 0
-```
-
-**注**
-
-1. 如果需要使用增量推理，use_past设置为True。设置use_past=True后生成的mindir有两个，分别在output/mindir_full_checkpoint和output/mindir_inc_checkpoint目录中。如果不设置use_past或者use_past=False，则只生成mindir_full_checkpoint目录，后续无法使用增量推理。
-
-2. 不同batch_size的推理需求需要对应生成不同的mindir，由参数--batch_size指定。
-
-### lite推理
-
-- step1. 新建context.cfg配置文件
-
-```text
-[ascend_context]
-plugin_custom_ops=All
-provider=ge
-[ge_session_options]
-ge.exec.formatMode=1
-ge.exec.precision_mode=must_keep_origin_dtype
-ge.externalWeight=1
-ge.exec.atomicCleanPolicy=1
-```
-
-- step2. 新建Python脚本
-
-```python
-# run_skywork_infer_lite.py
-import argparse
-
-from mindformers import LlamaTokenizer
-from mindformers.inference import InferTask
-from mindformers.inference.infer_config import InferConfig
-
-
-def infer_main(args):
-    lite_config = InferConfig(
-        prefill_model_path=args.full_model_path,
-        increment_model_path=args.inc_model_path,
-        model_name='llama',
-        model_type="mindir",
-        infer_seq_length=args.seq_length,
-        ge_config_path=args.config_path,
-        device_id=args.device_id,
-        add_special_tokens=False,
-    )
-    tokenizer_path = args.token_path
-    tokenizer = LlamaTokenizer(tokenizer_path, add_bos_token=True, add_eos_token=False)
-
-    batch_input = [
-        ["陕西的省会是西安，甘肃的省会是兰州，河南的省会是郑州" for i in range(args.batch_size)]
-    ]
-    input_list = batch_input * args.loop
-
-    infer_model = InferTask.get_infer_task("text_generation", lite_config, tokenizer=tokenizer)
-
-    for user_input in input_list:
-        output = infer_model.infer(user_input,
-                                   pad_token_id=tokenizer.pad_token_id,
-                                   eos_token_id=tokenizer.eos_token_id,
-                                   max_length=args.max_length,
-                                   add_special_tokens=True)
-        for out in output:
-            print(out)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--device_id', default=0, type=int, help='ID of the target device')
-    parser.add_argument('--full_model_path', default=None, type=str, help="load mindir full checkpoint")
-    parser.add_argument('--inc_model_path', default=None, type=str, help="load mindir inc checkpoint")
-    parser.add_argument('--config_path', default=None, type=str, help="ge config path")
-    parser.add_argument('--seq_length', default=4096, type=int)
-    parser.add_argument('--batch_size', default=1, type=int)
-    parser.add_argument('--max_length', default=128, type=int)
-    parser.add_argument('--loop', default=2, type=int)
-    parser.add_argument('--token_path', default=None, type=str)
-    args = parser.parse_args()
-    print(args)
-    infer_main(args)
-```
-
-- step3. 使用shell命令启动推理
-
-```shell
-# 如果需要增量推理，使用inc_model_path指定路径，否则不需要。token_path参数需指定为tokenizer.model实际路径。
-cd research
-python skywork/run_skywork_infer_lite.py --full_model_path output/mindir_full_checkpoint/rank_0_graph.mindir \
---inc_model_path output/mindir_inc_checkpoint/rank_0_graph.mindir --config_path skywork/context.cfg \
---token_path {path}/tokenizer.model \
---seq_length 4096 --max_length 128 --batch_size 1 --loop 2 --device_id 0
-
-# 运行结果：
-# 陕西的省会是西安，甘肃的省会是兰州，河南的省会是郑州，湖北的省会是武汉，湖南的省会是长沙，江西的省会是南昌，安徽的省会是合肥，四川的省会是成都，贵州的省会是贵阳，云南的省会是昆明，西藏的省会是拉萨，青海的省会是西宁，宁夏的省会是银川，新疆的省会是乌鲁木齐。
-```
-
-## 推理性能评测
-
-### 评测结果
-
-|batch_size|seq_length|Atlas 800T A2（400T）tokens/s|A100（首次） tokens/s|对比
-|----------|----------|----------|----------|----------|
-|2|1024|45.16967126|36.73233689|1.229697729
-|2|512|43.1641737|38.4874702|1.121512364
-|2|256|39.14945113|38.0915182|1.027773452
-|2|128|32.82671155|35.46970082|0.925486
-|2|64|23.67107342|29.16003315|0.811764284
-|2|32|10.86891748|16.52500627|0.657725468
-|平均|-|32.47499976|32.41101092|1.001974293
-
-### 评测流程
-
-推理性能评测基于[MindSpore Lite推理](#mindspore-lite)进行。
-
-- step1 生成增量推理的mindir文件
-
-```shell
-cd research
-python skywork/run_skywork.py --config skywork/run_skywork_13b.yaml --load_checkpoint /path/Skywork-13B/skywork_13b.ckpt --run_mode=export --use_parallel False --use_past True --batch_size 1 --device_id 0
-```
-
-## ms方式开源数据集评测
-
-### 评测结果
-
-|  |ceval|mmlu|cmmlu|
-|---|-----|----|-----|
-|官网|60.6|62.1|61.8 |
-|ms|60.63|62.14|61.83 |
-
-### 评测流程
-
-所用数据集为ceval、mmlu、cmmlu评测集。
-
-评测代码参考目录[evaluate](../../scripts/examples/evaluate)
-
-参数说明
-
-```text
--d：数据集路径。
--c：模型文件路径，pytorch需指定到目录，mindformers需指定到ckpt文件。
--t：tokenizer.model文件路径。
---config：配置文件路径。
-```
-
-1. CEVAL
-
-```shell
-cd research/skywork
-wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
-mkdir -p data/ceval
-mv ceval-exam.zip data/ceval
-cd data/ceval; unzip ceval-exam.zip
-cd ../../
-
-python ../../scripts/examples/evaluate/ceval/evaluate_ceval.py -d data/ceval/ -c /{path}/skywork_13b.ckpt -t /{path}/tokenizer.model --config run_skywork_13b.yaml --device_id 0
-```
-
-2. MMLU
-
-```Shell
-cd research/skywork
-wget https://people.eecs.berkeley.edu/~hendrycks/data.tar
-mkdir -p data/mmlu
-mv data.tar data/mmlu
-cd data/mmlu; tar xf data.tar
-cd ../../
-
-python ../../scripts/examples/evaluate/mmlu/evaluate_mmlu.py -d data/mmlu/data -c /{path}/skywork_13b.ckpt -t /{path}/tokenizer.model --config run_skywork_13b.yaml --device_id 1
-```
-
-3. CMMLU
-
-```Shell
-cd research/skywork
-wget https://huggingface.co/datasets/haonan-li/cmmlu/resolve/main/cmmlu_v1_0_1.zip
-mkdir data/cmmlu
-mv cmmlu_v1_0_1.zip data/cmmlu
-cd data/cmmlu; unzip cmmlu_v1_0_1.zip
-cd ../../
-
-python ../../scripts/examples/evaluate/cmmlu/evaluate_cmmlu.py -d data/cmmlu/ -c /{path}/skywork_13b.ckpt -t /{path}/tokenizer.model --config run_skywork_13b.yaml --device_id 7
-```
-
-## mslite方式开源数据集评测
-
-### 评测结果
-
-|  |ceval|mmlu|cmmlu|
-|---|-----|----|-----|
-|官网|60.6|62.1|61.8 |
-|mslite|60.61|62.13|61.83 |
-
-### 评测流程
-
-所用数据集为ceval、mmlu、cmmlu评测集。
-
-评测代码参考目录[evaluate](../../scripts/examples/evaluate)
-
-参数说明
-
-```text
--d：数据集路径。
--c：模型文件路径，pytorch需指定到目录，mindformers需指定到ckpt文件。
--t：tokenizer.model文件路径。
---config_path：GE配置文件context.cfg路径。
---full_model_path：导出的mindir路径。
-```
-
-**注** context.cfg文件生成和mindir的导出方式参考[MindSpore Lite推理](#MindSpore Lite推理)
-
-1. CEVAL
-
-```shell
-cd research/skywork
-wget https://huggingface.co/datasets/ceval/ceval-exam/resolve/main/ceval-exam.zip
-mkdir -p data/ceval
-mv ceval-exam.zip data/ceval
-cd data/ceval; unzip ceval-exam.zip
-cd ../../
-
-python ../../scripts/examples/evaluate/ceval/evaluate_ceval_lite.py -d data/ceval --config_path context.cfg --token_path /{path}/tokenizer.model --full_model_path output/mindir_full_checkpoint/rank_0_graph.mindir --device_id 6
-```
-
-2. MMLU
-
-```shell
-cd research/skywork
-wget https://people.eecs.berkeley.edu/~hendrycks/data.tar
-mkdir -p data/mmlu
-mv data.tar data/mmlu
-cd data/mmlu; tar xf data.tar
-cd ../../
-
-python ../../scripts/examples/evaluate/mmlu/evaluate_mmlu_lite.py -d data/mmlu/data  --config_path context.cfg --token_path /{path}/tokenizer.model --full_model_path output/mindir_full_checkpoint/rank_0_graph.mindir --device_id 6
-```
-
-3. CMMLU
-
-```shell
-cd research/skywork
-wget https://huggingface.co/datasets/haonan-li/cmmlu/resolve/main/cmmlu_v1_0_1.zip
-mkdir data/cmmlu
-mv cmmlu_v1_0_1.zip data/cmmlu
-cd data/cmmlu; unzip cmmlu_v1_0_1.zip
-cd ../../
-
-python ../../scripts/examples/evaluate/cmmlu/evaluate_cmmlu_lite.py -d data/cmmlu/  --config_path context.cfg --token_path /{path}/tokenizer.model --full_model_path output/mindir_full_checkpoint/rank_0_graph.mindir --device_id 6
-```
diff --git a/research/skywork/skywork_dataprocess.py b/research/skywork/skywork_dataprocess.py
deleted file mode 100644
index fbb6352f..00000000
--- a/research/skywork/skywork_dataprocess.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-transform skywork text dataset to mindrecord.
-"""
-
-import math
-import argparse
-import json
-import os
-import collections
-import pathlib
-import numpy as np
-
-from mindspore.mindrecord import FileWriter
-from mindformers.models.llama.llama_tokenizer import LlamaTokenizer
-
-
-def create_instance(tokenizer, ids, max_length=None):
-    """A single sample instance for LM task."""
-    pair_ids = None
-
-    output = tokenizer.prepare_for_model(ids=ids,
-                                         pair_ids=pair_ids,
-                                         add_special_tokens=False,
-                                         max_length=max_length,
-                                         padding='max_length',
-                                         truncate_direction="LEFT",
-                                         return_overflowing_tokens=False,
-                                         return_attention_mask=True)
-    return output
-
-
-def write_instance_to_file(instance_writer, instance):
-    """write the instance to file"""
-    input_ids = instance["input_ids"]
-    labels = instance["input_ids"]
-
-    features = collections.OrderedDict()
-    features["input_ids"] = np.asarray(input_ids).astype(np.int32)
-    features["labels"] = np.asarray(labels).astype(np.int32)
-    instance_writer.write_raw_data([features])
-
-    return features
-
-
-def tokenize_text(tokenizer, text_list, seq_length, instance_writer, batch_size):
-    """tokenize text dataset"""
-    dataset_all = []
-    for data in text_list:
-        dataset_all.append(data['text'])
-
-    batch_num = math.ceil(len(dataset_all) / batch_size)
-    print("dataset size ", len(dataset_all))
-    print("batch_size ", batch_size)
-    total_written = 0
-    for i in range(batch_num):
-        dataset_valid = dataset_all[i * batch_size:(i + 1) * batch_size]
-        data_tokens = tokenizer(dataset_valid)
-        input_ids = data_tokens["input_ids"]
-        total_ids = [item for sublist in input_ids for item in sublist]
-
-        block_size = seq_length + 1
-        total_length = len(total_ids)
-        total_length = (total_length // seq_length) * seq_length
-        for j in range(total_length // seq_length):
-            ids = total_ids[seq_length * j:seq_length * (j + 1)]
-            ids.append(tokenizer.pad_token_id)
-
-            output = create_instance(tokenizer, ids, block_size)
-
-            write_instance_to_file(instance_writer, instance=output)
-            total_written += 1
-
-    print("Wrote {} total instances".format(total_written))
-
-
-def get_text(args_param):
-    data_path = pathlib.Path(args_param.input_file_path)
-
-    text_list = []
-
-    with open(data_path, 'r', encoding="utf-8") as input_file:
-        for line in input_file:
-            data = json.loads(line)
-            text_list.append({"text": data["content"] + data["summary"]})
-    return text_list
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_file_path", type=str, default="AdvertiseGenTrain.jsonl")
-    parser.add_argument('--dataset_type', type=str, default='text')
-    parser.add_argument('--output_file', type=str, default='AdvertiseGenTrain_text.mindrecord')
-    parser.add_argument('--tokenizer', type=str, default='llama', choices=['llama'])
-    parser.add_argument('--model_file', type=str, default='./tokenizer.model')
-    parser.add_argument('--file_partition', type=int, default=1)
-    parser.add_argument('--repeat', type=int, default=1)
-    parser.add_argument('--seq_length', type=int, default=4096)
-    parser.add_argument('--batch_size', type=int, default=1000)
-    args = parser.parse_args()
-
-    text_dataset = get_text(args)
-
-    out_dir, out_file = os.path.split(os.path.abspath(args.output_file))
-    if not os.path.exists(out_dir):
-        os.mkdir(out_dir)
-    schema = {'input_ids': {"type": "int32", "shape": [-1]}, 'labels': {"type": "int32", "shape": [-1]}}
-    writer = FileWriter(file_name=args.output_file,
-                        shard_num=args.file_partition)
-    writer.add_schema(schema, args.dataset_type)
-    # Start to load tokenizer
-    if not os.path.exists(args.model_file):
-        raise FileNotFoundError(f"file {args.model_file} do not exists.")
-
-    transforms_count = 0
-    word_tokenizer = LlamaTokenizer(vocab_file=args.model_file)
-    word_tokenizer.add_bos_token = True
-    word_tokenizer.add_eos_token = False
-    tokenize_text(word_tokenizer, text_dataset, args.seq_length, writer, args.batch_size)
-    writer.commit()
-    out_file = args.output_file
-    if args.file_partition > 1:
-        out_file += '0'
-    print("Transform finished, output files refer: {}".format(out_file))
diff --git a/research/telechat/convert_reversed.py b/research/telechat/convert_reversed.py
deleted file mode 100644
index dfe760f0..00000000
--- a/research/telechat/convert_reversed.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from huggingface"""
-import re
-import argparse
-import torch
-import mindspore as ms
-
-from mindformers.utils.convert_utils import ms2pt
-
-layer_rename_map = {
-    'model.tok_embeddings.embedding_weight': 'word_embeddings.weight',
-    'attention_norm.weight': 'input_layernorm.weight',
-    'attention.wo.weight': 'self_attention.dense.weight',
-    'attention.wo.bias': 'self_attention.dense.bias',
-    'attention.wq.weight': 'self_attention.query.weight',
-    'attention.wk_v.weight': 'self_attention.key_value.weight',
-    'feed_forward.w1.weight': 'mlp.gate_proj.weight',
-    'feed_forward.w2.weight': 'mlp.down_proj.weight',
-    'feed_forward.w2.bias': 'mlp.down_proj.bias',
-    'feed_forward.w3.weight': 'mlp.up_proj.weight',
-    'ffn_norm.weight': 'post_attention_layernorm.weight',
-    'model.norm_out.weight': 'ln_f.weight'
-}
-
-
-def convert_ms_to_pt(input_path, output_path, dtype=None, **kwargs):
-    """convert ms weight to hf."""
-    telechat_type = kwargs.pop("telechat_type", "telechat_12b")
-    if telechat_type == "telechat_12b":
-        layer_rename_map["lm_head.weight"] = "lm_head.weight"
-        layer_rename_map["model.tok_embeddings.embedding_weight"] = "transformer.word_embeddings.weight"
-        layer_rename_map["model.norm_out.weight"] = "transformer.ln_f.weight"
-    param_dict = ms.load_checkpoint(input_path)
-    state_dict = {}
-    for name, value in param_dict.items():
-        value = ms2pt(value, dtype)
-        if name in layer_rename_map:
-            name = layer_rename_map[name]
-        else:
-            match = re.match(r"model\.layers\.(\d+).(\w+\.\w+\.\w+|\w+\.\w+)$", name)
-            layer_number = int(match.group(1))
-            text = match.group(2)
-            name = f"h.{layer_number}.{layer_rename_map[text]}"
-        state_dict[name] = value
-
-    torch.save(state_dict, output_path)
-    print(f"*** finish ms convert torch model, torch_ckpt save in {output_path} ***")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Telechat convert script")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="",
-                        help="The output mindspore checkpoint path.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="",
-                        help="The input torch checkpoint path.")
-    parser.add_argument("--telechat_type",
-                        type=str,
-                        default="telechat_12b",
-                        help="Telechat version.")
-    args = parser.parse_args()
-
-    # convert hf ckpt to ms
-    convert_ms_to_pt(args.mindspore_path, args.torch_path, telechat_type=args.telechat_type)
diff --git a/research/telechat/convert_weight.py b/research/telechat/convert_weight.py
deleted file mode 100644
index da252490..00000000
--- a/research/telechat/convert_weight.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from huggingface"""
-import os.path
-import re
-import argparse
-import torch
-import mindspore as ms
-
-from mindformers.utils.convert_utils import pt2ms
-
-
-def layer_name_mapping(telechat_type, key):
-    """Convert huggingface PP weights mapping in MindSpore.
-
-    return: new_name
-    """
-    # Handle first and last layers
-    layer_rename_map = {
-        "word_embeddings.weight": "model.tok_embeddings.embedding_weight",
-        "input_layernorm.weight": "attention_norm.weight",
-        "self_attention.dense.weight": "attention.wo.weight",
-        "self_attention.dense.bias": "attention.wo.bias",
-        "self_attention.query.weight": "attention.wq.weight",
-        "self_attention.key_value.weight": "attention.wk_v.weight",
-        "mlp.gate_proj.weight": "feed_forward.w1.weight",
-        "mlp.down_proj.weight": "feed_forward.w2.weight",
-        "mlp.down_proj.bias": "feed_forward.w2.bias",
-        "mlp.up_proj.weight": "feed_forward.w3.weight",
-        "post_attention_layernorm.weight": "ffn_norm.weight",
-        "ln_f.weight": "model.norm_out.weight"
-    }
-    if telechat_type == "telechat_12b":
-        del layer_rename_map["word_embeddings.weight"]
-        del layer_rename_map["ln_f.weight"]
-        layer_rename_map["lm_head.weight"] = "lm_head.weight"
-        layer_rename_map["transformer.word_embeddings.weight"] = "model.tok_embeddings.embedding_weight"
-        layer_rename_map["transformer.ln_f.weight"] = "model.norm_out.weight"
-    if key in layer_rename_map:
-        return layer_rename_map[key]
-
-    # Handle transformer blocks
-    match = re.match(r'^\w*\.(\d+)\.(\w+\.\w+\.\w+|\w+\.\w+)$', key)
-    layer_number = int(match.group(1))
-    text = match.group(2)
-    return f"model.layers.{layer_number}." + layer_rename_map[text]
-
-
-def convert_pt_to_ms(input_path, output_path, dtype=None, **kwargs):
-    """convert hf weight to ms"""
-    telechat_type = kwargs.pop("telechat_type", "telechat_12b")
-    state_dict = {}
-    torch_dir = os.path.dirname(input_path)
-    for file_name in os.listdir(torch_dir):
-        if file_name.startswith("pytorch_model") and file_name.endswith(".bin"):
-            file_name = os.path.join(torch_dir, file_name)
-            state_dict.update(torch.load(file_name, map_location='cpu'))
-
-    ms_params = []
-    for k, v in state_dict.items():
-        ms_params.append({'name': layer_name_mapping(telechat_type, k), 'data': pt2ms(v, dtype)})
-
-    ms.save_checkpoint(ms_params, output_path)
-    print(f"*** finish torch convert ms model, ms_ckpt save in {output_path} ***")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Telechat convert script")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="",
-                        help="The input torch checkpoint path.")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="",
-                        help="The output mindspore checkpoint path.")
-    parser.add_argument("--telechat_type",
-                        type=str,
-                        default="telechat_12b",
-                        help="Telechat version.")
-    args = parser.parse_args()
-
-    # convert hf ckpt to ms
-    convert_pt_to_ms(args.torch_path, args.mindspore_path, telechat_type=args.telechat_type)
diff --git a/research/telechat/convert_weight_ms_to_torch.py b/research/telechat/convert_weight_ms_to_torch.py
deleted file mode 100644
index b2822166..00000000
--- a/research/telechat/convert_weight_ms_to_torch.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Convert MindSpore checkpoint to Torch"""
-import os
-import re
-import argparse
-import torch
-from mindspore import load_checkpoint
-
-
-def layer_name_mapping(model_name, key):
-    """Convert huggingface PP weights mapping in MindSpore.
-
-    return: new_name
-    """
-    prefix = ''
-    # Handle first and last layers
-    layer_rename_map = {
-        "model.tok_embeddings.embedding_weight": "word_embeddings.weight",
-        "attention_norm.weight": "input_layernorm.weight",
-        "attention.wo.weight": "self_attention.dense.weight",
-        "attention.wo.bias": "self_attention.dense.bias",
-        "attention.wq.weight": "self_attention.query.weight",
-        "attention.wk_v.weight": "self_attention.key_value.weight",
-        "feed_forward.w1.weight": "mlp.gate_proj.weight",
-        "feed_forward.w2.weight": "mlp.down_proj.weight",
-        "feed_forward.w2.bias": "mlp.down_proj.bias",
-        "feed_forward.w3.weight": "mlp.up_proj.weight",
-        "ffn_norm.weight": "post_attention_layernorm.weight",
-        "model.norm_out.weight": "ln_f.weight"
-    }
-    if model_name == "telechat_12b":
-        layer_rename_map["lm_head.weight"] = "lm_head.weight"
-        layer_rename_map["model.tok_embeddings.embedding_weight"] = "transformer.word_embeddings.weight"
-        layer_rename_map["model.norm_out.weight"] = "transformer.ln_f.weight"
-    if key in layer_rename_map:
-        return prefix + layer_rename_map[key]
-
-    match = re.compile(r'\w+\.\w+.(\d+)\.(.*)')
-    layer_number = match.findall(key)[0][0]
-    text = match.findall(key)[0][1]
-    # Handle transformer blocks
-    return f"{prefix}h.{layer_number}." + layer_rename_map[text]
-
-def ms_to_torch(model_name, ms_weights):
-    """Convert ms layers to torch."""
-    torch_params = {}
-    for k, v in ms_weights.items():
-        new_name = layer_name_mapping(model_name, k)
-        torch_params[new_name] = torch.from_numpy(v.asnumpy())
-    return torch_params
-
-def process_shard_files(config):
-    if config.torch_path and not os.path.exists(config.torch_path):
-        os.makedirs(config.torch_path, exist_ok=True)
-
-    file_name = "torch"
-    ms_params = load_checkpoint(config.mindspore_path)
-    torch_params = ms_to_torch(config.model_name, ms_params)
-    save_file = config.torch_path + '/' + file_name + '.pth'
-    torch.save(torch_params, save_file)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Telechat convert script")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="",
-                        help="The output mindspore checkpoint path.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="",
-                        help="The input torch checkpoint path.")
-    parser.add_argument("--model_name",
-                        type=str,
-                        default="telechat_12b",
-                        help="The name of model, supports name in {'telechat_7b', 'telechat_12b'}")
-    args = parser.parse_args()
-
-    # convert hf ckpt to ms
-    process_shard_files(config=args)
-    current_path = os.getcwd()
-    torch_ckpt_path = os.path.join(current_path, args.torch_path)
-    print("*** finish ms convert torch model, torch_ckpt save in {} ***".format(torch_ckpt_path))
diff --git a/research/telechat/convert_weight_torch_to_ms.py b/research/telechat/convert_weight_torch_to_ms.py
deleted file mode 100644
index 1df19341..00000000
--- a/research/telechat/convert_weight_torch_to_ms.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from huggingface"""
-import os
-import re
-import argparse
-import torch
-import mindspore
-from mindspore import Tensor, Parameter
-
-def layer_name_mapping(model_name, key):
-    """Convert huggingface PP weights mapping in MindSpore.
-
-    return: new_name
-    """
-    prefix = ''
-    # Handle first and last layers
-    layer_rename_map = {
-        "word_embeddings.weight": "model.tok_embeddings.embedding_weight",
-        "input_layernorm.weight": "attention_norm.weight",
-        "self_attention.dense.weight": "attention.wo.weight",
-        "self_attention.dense.bias": "attention.wo.bias",
-        "self_attention.query.weight": "attention.wq.weight",
-        "self_attention.key_value.weight": "attention.wk_v.weight",
-        "mlp.gate_proj.weight": "feed_forward.w1.weight",
-        "mlp.down_proj.weight": "feed_forward.w2.weight",
-        "mlp.down_proj.bias": "feed_forward.w2.bias",
-        "mlp.up_proj.weight": "feed_forward.w3.weight",
-        "post_attention_layernorm.weight": "ffn_norm.weight",
-        "ln_f.weight": "model.norm_out.weight"
-    }
-    if model_name == "telechat_12b":
-        del layer_rename_map["word_embeddings.weight"]
-        del layer_rename_map["ln_f.weight"]
-        layer_rename_map["lm_head.weight"] = "lm_head.weight"
-        layer_rename_map["transformer.word_embeddings.weight"] = "model.tok_embeddings.embedding_weight"
-        layer_rename_map["transformer.ln_f.weight"] = "model.norm_out.weight"
-    if key in layer_rename_map:
-        return prefix + layer_rename_map[key]
-
-    # Handle transformer blocks
-    match = re.match(r'^\w+\.\w*\.(\d+)\.(\w+\.\w+\.\w+|\w+\.\w+)$', key)
-    layer_number = int(match.group(1))
-    text = match.group(2)
-    return f"{prefix}model.layers.{layer_number}." + layer_rename_map[text]
-
-def hf_to_ms(hf_weights, model_name, ms_dtype=mindspore.float16, for_save=False):
-    """Convert hf layers to ms."""
-    ms_params = {}
-    for k, v in hf_weights.items():
-        new_name = layer_name_mapping(model_name, k)
-        new_tensor = Tensor(v.float().detach().numpy(), ms_dtype)
-        ms_params[new_name] = Parameter(new_tensor, name=new_name)
-    if for_save:
-        return [{'name': k, 'data': v} for k, v in ms_params.items()]
-    return ms_params
-
-def process_shard_files(files, config, ms_dtype=mindspore.float16):
-    ''' torch ckpt files loop'''
-    if config.mindspore_path and not os.path.exists(args.mindspore_path):
-        os.makedirs(config.mindspore_path, exist_ok=True)
-
-    ms_file_name = "mindspore"
-    combine_params = []
-    for per_file in files:
-        pt_states = torch.load(per_file, map_location='cpu')
-        ms_params = hf_to_ms(pt_states, config.model_name, ms_dtype, True)
-        combine_params.extend(ms_params)
-        del ms_params
-    save_file = config.mindspore_path + '/' + ms_file_name + '.ckpt'
-    mindspore.save_checkpoint(combine_params, save_file)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Telechat convert script")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="",
-                        help="The input torch checkpoint path.")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="",
-                        help="The output mindspore checkpoint path.")
-    parser.add_argument("--model_name",
-                        type=str,
-                        default="telechat_12b",
-                        help="The name of model, supports name in {'telechat_7b', 'telechat_12b'}")
-    args = parser.parse_args()
-
-    # convert hf ckpt to ms
-    files_list = []
-    for file_name in os.listdir(args.torch_path):
-        if file_name.startswith("pytorch_model") and file_name.endswith(".bin"):
-            files_list.append(os.path.join(args.torch_path, file_name))
-    process_shard_files(files=files_list, config=args)
-    current_path = os.getcwd()
-    mindspore_ckpt_path = os.path.join(current_path, args.mindspore_path)
-    print("*** finish torch convert ms model, ms_ckpt save in {} ***".format(mindspore_ckpt_path))
diff --git a/research/telechat/run_telechat.py b/research/telechat/run_telechat.py
deleted file mode 100644
index ad7399b5..00000000
--- a/research/telechat/run_telechat.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat Train/Finetune scripts."""
-import os
-import sys
-import shutil
-import argparse
-
-# pylint: disable=W0611
-from mindformers import Trainer, MindFormerConfig
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.tools.utils import check_in_modelarts, set_remote_save_url, str2bool, check_shared_disk
-from mindformers.tools.logger import logger
-from mindformers.tools.cloud_adapter import cloud_monitor
-from mindformers.core.context import build_context
-from mindformers.tools import get_output_root_path
-from mindformers.tools.register.register import MindFormerModuleType, MindFormerRegister
-from telechat_config import TelechatConfig
-from telechat import TelechatForCausalLM
-MindFormerRegister.register_cls(TelechatConfig, MindFormerModuleType.CONFIG)
-MindFormerRegister.register_cls(TelechatForCausalLM, MindFormerModuleType.MODELS)
-
-
-if check_in_modelarts():
-    import moxing as mox
-
-sys.path.insert(0, os.getcwd().split('research')[0])
-
-def clear_auto_trans_output(config):
-    """clear transformed_checkpoint and strategy"""
-    if check_in_modelarts():
-        obs_strategy_dir = os.path.join(config.remote_save_url, "strategy")
-        if mox.file.exists(obs_strategy_dir) and config.local_rank == 0:
-            mox.file.remove(obs_strategy_dir, recursive=True)
-            mox.file.make_dirs(obs_strategy_dir)
-        obs_transformed_ckpt_dir = os.path.join(config.remote_save_url, "transformed_checkpoint")
-        if mox.file.exists(obs_transformed_ckpt_dir) and config.local_rank == 0:
-            mox.file.remove(obs_transformed_ckpt_dir, recursive=True)
-            mox.file.make_dirs(obs_transformed_ckpt_dir)
-    else:
-        strategy_dir = os.path.join(get_output_root_path(), "strategy")
-        if os.path.exists(strategy_dir) and config.local_rank == 0:
-            shutil.rmtree(strategy_dir)
-            os.makedirs(strategy_dir, exist_ok=True)
-        transformed_ckpt_dir = os.path.join(get_output_root_path(), "transformed_checkpoint")
-        if os.path.exists(transformed_ckpt_dir) and config.local_rank == 0:
-            shutil.rmtree(transformed_ckpt_dir)
-            os.makedirs(transformed_ckpt_dir, exist_ok=True)
-
-
-def context_init(use_parallel=False, optimizer_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                enable_parallel_optimizer=optimizer_parallel,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-@cloud_monitor()
-def main(task='text_generation',
-         config='run_telechat_7b_finetune_910b.yaml',
-         run_mode='train',
-         seq_length=None,
-         mode=None,
-         use_parallel=None,
-         device_id=None,
-         ckpt=None,
-         strategy=None,
-         auto_trans_ckpt=None,
-         resume=False,
-         train_dataset='',
-         remote_save_url=None,
-         vocab_file=None):
-    """main function."""
-
-    assert os.path.exists(config) and config.endswith(('.yaml', '.yml'))
-
-    # init config
-    config = MindFormerConfig(os.path.realpath(config))
-    if seq_length is not None:
-        config.model.model_config.seq_length = seq_length
-    if mode is not None:
-        config.context.mode = mode
-        if mode:
-            config.recompute_config.recompute = False
-    if use_parallel is not None:
-        config.use_parallel = use_parallel
-    if device_id is not None:
-        config.context.device_id = device_id
-    if ckpt is None:
-        ckpt = config.load_checkpoint
-    if strategy is not None and os.path.exists(strategy):
-        config.src_strategy_path_or_dir = strategy
-    if auto_trans_ckpt is not None:
-        config.auto_trans_ckpt = auto_trans_ckpt
-    if vocab_file is not None:
-        config.processor.tokenizer.vocab_file = vocab_file
-
-    if config.output_dir != './output':
-        raise ValueError("output_dir must be set to './output' and cannot be customized.")
-
-    # init context
-    build_context(config)
-
-    if check_in_modelarts() and remote_save_url:
-        logger.info("remote_save_url is %s, the output file will be uploaded to here.", remote_save_url)
-        set_remote_save_url(remote_save_url)
-        config.remote_save_url = remote_save_url
-
-    if run_mode in ['train', 'finetune']:
-        config.model.model_config.use_past = False
-
-    if config.auto_trans_ckpt:
-        if config.device_num <= 8 or check_shared_disk(config.output_dir) or check_in_modelarts():
-            clear_auto_trans_output(config)
-        else:
-            raise ValueError("When device num > 8 and auto_trans_ckpt is set to True,"
-                             "the output_dir should be a shared directory that can be accessed by all nodes."
-                             f"but {os.path.abspath(config.output_dir)} is not a shared directory.")
-
-    # start task
-    if run_mode == 'train':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.train(train_checkpoint=ckpt, auto_trans_ckpt=config.auto_trans_ckpt, resume_training=resume)
-    elif run_mode == 'finetune':
-        trainer = Trainer(args=config,
-                          task=task,
-                          train_dataset=train_dataset)
-        trainer.finetune(finetune_checkpoint=ckpt, auto_trans_ckpt=config.auto_trans_ckpt, resume_training=resume)
-    else:
-        raise ValueError("run_mode only support train and finetune.")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--task', default='text_generation', type=str,
-                        help='set task type.')
-    parser.add_argument('--config', default='telechat/run_telechat_7b_910b_finetune.yaml', type=str,
-                        help='set task type.')
-    parser.add_argument('--run_mode', default='train', type=str,
-                        help='set run mode for model.')
-    parser.add_argument('--seq_length', default=None, type=int,
-                        help='seq_length')
-    parser.add_argument('--use_parallel', default=True, type=str2bool,
-                        help='open parallel for model.')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='device id set when run on single card. Default: 0')
-    parser.add_argument('--mode', default=0, type=int,
-                        help='0--Graph Mode; 1--Pynative Mode')
-    parser.add_argument('--load_checkpoint', default=None, type=str,
-                        help='checkpoint name or dir to load.')
-    parser.add_argument('--src_strategy', default=None, type=str,
-                        help='strategy of load_checkpoint')
-    parser.add_argument('--auto_trans_ckpt', default=None, type=str2bool,
-                        help='whether to transform checkpoint to the checkpoint matching current distribute strategy.')
-    parser.add_argument('--resume', default=None, type=str2bool,
-                        help='whether resume training.')
-    parser.add_argument('--train_dataset', default='', type=str,
-                        help='set train dataset.')
-    parser.add_argument('--remote_save_url', default='', type=str,
-                        help='whether use optimizer parallel. Default: None')
-    parser.add_argument('--vocab_file', default=None, type=str,
-                        help='tokenizer model')
-    args = parser.parse_args()
-
-    main(task=args.task,
-         config=args.config,
-         run_mode=args.run_mode,
-         seq_length=args.seq_length,
-         mode=args.mode,
-         use_parallel=args.use_parallel,
-         device_id=args.device_id,
-         ckpt=args.load_checkpoint,
-         strategy=args.src_strategy,
-         auto_trans_ckpt=args.auto_trans_ckpt,
-         resume=args.resume,
-         train_dataset=args.train_dataset,
-         remote_save_url=args.remote_save_url,
-         vocab_file=args.vocab_file)
diff --git a/research/telechat/run_telechat_12b_910b.yaml b/research/telechat/run_telechat_12b_910b.yaml
deleted file mode 100644
index 7bd47b6a..00000000
--- a/research/telechat/run_telechat_12b_910b.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-seed: 0
-output_dir: './output'
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'telechat_12b'
-
-# runner config
-runner_config:
-  epochs: 5
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-  gradient_accumulation_steps: 1
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-5
-  learning_rate: 2.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 2.e-5
-  lr_end: 0
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 6
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 8 910B
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  micro_batch_num: 4
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "telechat_12b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "58GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: TelechatConfig
-    model_name: 'telechat_12b'
-    batch_size: 1 # add for increase predict
-    seq_length: 1024
-    hidden_size: 5120
-    num_layers: 38
-    num_heads: 32
-    vocab_size: 120000
-    rms_norm_eps: 1.0e-5
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 3
-    ignore_token_id: -100
-    hidden_dropout_prob: 0.1
-    attention_dropout_prob: 0.1
-    ffn_dim_multiplier: 12288
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    use_flash_attention: True # FA can accelerate training or finetune
-    offset: 0
-    checkpoint_name_or_path: "telechat_12"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: TelechatForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<_start>'
-    eos_token: '<_end>'
-    pad_token: '<_pad>'
-    type: TelechatTokenizer
-  type: TelechatProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4096
-    scale_factor: 2
-    scale_window: 500
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/telechat/run_telechat_12b_finetune_910b.yaml b/research/telechat/run_telechat_12b_finetune_910b.yaml
deleted file mode 100644
index 06392754..00000000
--- a/research/telechat/run_telechat_12b_finetune_910b.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-seed: 0
-output_dir: './output'
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'finetune'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'telechat_12b'
-
-# runner config
-runner_config:
-  epochs: 5
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-  gradient_accumulation_steps: 1
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-5
-  learning_rate: 2.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 2.e-5
-  lr_end: 0
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 2
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 8 910B
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  micro_batch_num: 64
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "telechat_12b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "58GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: TelechatConfig
-    model_name: 'telechat_12b'
-    batch_size: 1 # add for increase predict
-    seq_length: 1024
-    hidden_size: 5120
-    num_layers: 38
-    num_heads: 32
-    vocab_size: 120000
-    rms_norm_eps: 1.0e-5
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 3
-    ignore_token_id: -100
-    hidden_dropout_prob: 0.1
-    attention_dropout_prob: 0.1
-    ffn_dim_multiplier: 12288
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    use_flash_attention: True # FA can accelerate training or finetune
-    offset: 0
-    checkpoint_name_or_path: "telechat_12b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: TelechatForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<_start>'
-    eos_token: '<_end>'
-    pad_token: '<_pad>'
-    type: TelechatTokenizer
-  type: TelechatProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4096
-    scale_factor: 2
-    scale_window: 500
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/telechat/run_telechat_7b_910b.yaml b/research/telechat/run_telechat_7b_910b.yaml
deleted file mode 100644
index 67a72785..00000000
--- a/research/telechat/run_telechat_7b_910b.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-seed: 0
-output_dir: './output'
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'train'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'telechat_7b'
-
-# runner config
-runner_config:
-  epochs: 5
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-  gradient_accumulation_steps: 1
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-5
-  learning_rate: 1.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 1.e-5
-  lr_end: 0
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 6
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 8 910B
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  micro_batch_num: 4
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "telechat_7b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "58GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: TelechatConfig
-    model_name: 'telechat_7b'
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-    hidden_size: 4096
-    num_layers: 30
-    num_heads: 32
-    vocab_size: 160256
-    rms_norm_eps: 1.0e-5
-    bos_token_id: 160132
-    eos_token_id: 160133
-    pad_token_id: 3
-    ignore_token_id: -100
-    hidden_dropout_prob: 0.1
-    attention_dropout_prob: 0.1
-    ffn_dim_multiplier: 12288
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    use_flash_attention: True # FA can accelerate training or finetune
-    offset: 0
-    checkpoint_name_or_path: "telechat_7b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: TelechatForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<_start>'
-    eos_token: '<_end>'
-    pad_token: '<pad>'
-    type: TelechatTokenizer
-  type: TelechatProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4096
-    scale_factor: 2
-    scale_window: 500
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/telechat/run_telechat_7b_finetune_910b.yaml b/research/telechat/run_telechat_7b_finetune_910b.yaml
deleted file mode 100644
index 3b85b853..00000000
--- a/research/telechat/run_telechat_7b_finetune_910b.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-seed: 0
-output_dir: './output'
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-run_mode: 'finetune'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'telechat_7b'
-
-# runner config
-runner_config:
-  epochs: 5
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-  gradient_accumulation_steps: 1
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-5
-  learning_rate: 1.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 1.e-5
-  lr_end: 0
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 2
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-use_parallel: True
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-# default parallel of device num = 8 910B
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-  use_seq_parallel: False
-  micro_batch_num: 64
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  select_recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "telechat_7b"
-    save_checkpoint_steps: 50  
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "58GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# model config
-model:
-  model_config:
-    type: TelechatConfig
-    model_name: 'telechat_7b'
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-    hidden_size: 4096
-    num_layers: 30
-    num_heads: 32
-    vocab_size: 160256
-    rms_norm_eps: 1.0e-5
-    bos_token_id: 160132
-    eos_token_id: 160133
-    pad_token_id: 3
-    ignore_token_id: -100
-    hidden_dropout_prob: 0.1
-    attention_dropout_prob: 0.1
-    ffn_dim_multiplier: 12288
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float16"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: False
-    extend_method: "None" # support "None", "PI", "NTK"
-    use_flash_attention: True # FA can accelerate training or finetune
-    offset: 0
-    checkpoint_name_or_path: "telechat_7b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: TelechatForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<_start>'
-    eos_token: '<_end>'
-    pad_token: '<pad>'
-    type: TelechatTokenizer
-  type: TelechatProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 4096
-    scale_factor: 2
-    scale_window: 500
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/telechat/run_telechat_predict.py b/research/telechat/run_telechat_predict.py
deleted file mode 100644
index 1ce4da0d..00000000
--- a/research/telechat/run_telechat_predict.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat predict scripts."""
-import argparse
-import json
-import copy
-from typing import Optional, Union, List, Dict
-from telechat_tokenizer import TelechatTokenizer
-from telechat_config import TelechatConfig
-from research.telechat.telechat_predict_utils import History
-from telechat import TelechatForCausalLM
-from mindformers import MindFormerConfig, TransformerOpParallelConfig
-from mindformers import init_context
-from mindformers.tools.utils import str2bool
-from mindformers.tools.logger import logger
-from mindformers.generation import GenerationConfig
-
-USER_TOKEN_ID = 20
-BOT_TOKEN_ID = 21
-
-def chat(model, tokenizer, question: str = '', history: Union[List[Dict], History] = None,
-         generation_config: Optional[GenerationConfig] = None):
-    """
-    Args:
-        tokenizer:  the tokenizer of  telechat
-        question: question which the model reply in this turn
-        history: history which will format the input for telechat
-        stream: if return the full text at last or yield the text in token
-        generation_config:  configuration for generation
-        **kwargs: args which will update the generation config or pass to model forward
-    """
-    if not generation_config:
-        logger.error("generation_config is None")
-        raise ValueError("generation_config must not be None")
-    if not question:
-        logger.error("question is empty")
-        raise ValueError("question must not be empty")
-    if history is None:
-        history = []
-
-    generation_config = copy.deepcopy(generation_config)
-    user_id = generation_config.user_token_id
-    bot_id = generation_config.bot_token_id
-
-    # transfer to History
-    if not isinstance(history, History):
-        history = History(tokenizer, history)
-
-    inputs = build_inputs_for_chat(tokenizer, question, history, generation_config, user_id, bot_id)
-    history.append({"role": "user", "content": question})
-    outputs = model.generate(inputs,
-                             max_length=generation_config.max_decode_length,
-                             do_sample=generation_config.do_sample,
-                             top_k=generation_config.top_k,
-                             top_p=generation_config.top_p,
-                             max_new_tokens=generation_config.max_new_tokens)
-    response = tokenizer.decode(outputs[0][len(inputs):-1])
-    history.append({"role": "bot", "content": response})
-    return response, history
-
-def build_inputs_for_chat(tokenizer, question, history, generation_config, usr_id, bot_id):
-    """
-    check history and  build inputs here
-    """
-    # first tokenize question
-    q_token = tokenizer(question)
-    qa_history = copy.deepcopy(history)
-
-    # get the max length we should build our inputs in
-    model_max_length = generation_config.seq_length
-    build_max_length = max(0, model_max_length - generation_config.max_new_tokens) \
-        if generation_config.max_new_tokens else max(0, generation_config.max_decode_length)
-    if build_max_length < 3:
-        raise ValueError("the model can not meet the  requirements of input length,Please check config")
-
-    # trunc left
-    input_tokens = [usr_id] + q_token["input_ids"][-build_max_length + 1:] + [bot_id]
-    length = len(input_tokens)
-
-    while len(qa_history) >= 1:
-        message = qa_history.pop()
-        if message["role"] == "user":
-            tokens = [usr_id] + message["input_ids"]
-        elif message["role"] == "bot":
-            tokens = [bot_id] + message["input_ids"] + [generation_config.eos_token_id]
-        else:
-            tokens = []
-        if len(tokens) + length >= build_max_length:
-            break
-        else:
-            input_tokens = tokens + input_tokens
-    return input_tokens
-
-
-def main():
-    """main function."""
-    input_questions = []
-    input_file = open(args.input_file, 'r', encoding='utf-8')
-    for line in input_file.readlines():
-        dic = json.loads(line)
-        input_questions.append(dic["input"])
-    input_file.close()
-    # set model config
-    config = MindFormerConfig(args.yaml_file)
-    config.use_parallel = False
-    # 初始化环境
-    init_context(context_config=config.context)
-
-    model_config = TelechatConfig(**config.model.model_config)
-    model_config.parallel_config = TransformerOpParallelConfig(**config.parallel_config)
-    model_config.batch_size = 1
-    model_config.use_past = args.use_past
-    model_config.use_flash_attention = False
-    model_config.user_token_id = USER_TOKEN_ID
-    model_config.bot_token_id = BOT_TOKEN_ID
-    model_config.max_new_tokens = None
-
-    if args.checkpoint_path and not config.use_parallel:
-        model_config.checkpoint_name_or_path = args.checkpoint_path
-    print(f"config is: {model_config}")
-
-    # build tokenizer
-    tokenizer = TelechatTokenizer(args.vocab_file_path, fast_tokenizer=True,
-                                  trust_remote_code=True)
-    # build model from config
-    model = TelechatForCausalLM(model_config)
-    for question in input_questions:
-        print("question:", question)
-        answer, history = chat(model, tokenizer, question, generation_config=model_config)
-        print("answer:", answer)
-        print("截至目前的聊天记录是:", history)
-        print("\n")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input_file', default='', type=str,
-                        help='input to infer.')
-    parser.add_argument('--vocab_file_path', default='', type=str,
-                        help='which model to use.')
-    parser.add_argument('--checkpoint_path', default='', type=str,
-                        help='set checkpoint path.')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help='whether use past.')
-    parser.add_argument('--yaml_file', default="", type=str,
-                        help='predict yaml path')
-    args = parser.parse_args()
-    main()
diff --git a/research/telechat/telechat.md b/research/telechat/telechat.md
deleted file mode 100644
index 2e9a8e0f..00000000
--- a/research/telechat/telechat.md
+++ /dev/null
@@ -1,548 +0,0 @@
-# 星辰语义大模型 Telechat
-
-## 模型描述
-
-星辰语义大模型Telechat是由中电信人工智能科技有限公司研发训练的大语言模型，采用3万亿Tokens中英文高质量语料进行训练。目前开源模型：Telechat-7B，Telechat-12B模型，本仓库已支持7B和12B模型的微调权重，权重文件来源于中电信人工智能科技有限公司。
-
-基于GPU，Torch版本的Telechat链接：
-
-[Telechat](https://github.com/Tele-AI/Telechat)
-
-[TeleChat Technical Report](https://arxiv.org/abs/2401.03804)
-
-``` text
-@article{wang2024telechat,
-      title={TeleChat Technical Report},
-      author={Zihan Wang and Xinzhang Liu and Shixuan Liu and Yitong Yao and Yuyao Huang and Zhongjiang He and Xuelong Li and Yongxiang Li and Zhonghao Che and Zhaoxi Zhang and Yan Wang and Xin Wang and Luwen Pu and Huihan Xu and Ruiyu Fang and Yu Zhao and Jie Zhang and Xiaomeng Huang and Zhilong Lu and Jiaxin Peng and Wenjun Zheng and Shiquan Wang and Bingkai Yang and Xuewei he and Zhuoru Jiang and Qiyi Xie and Yanhan Zhang and Zhongqiu Li and Lingling Shi and Weiwei Fu and Yin Zhang and Zilu Huang and Sishi Xiong and Yuxiang Zhang and Chao Wang and Shuangyong Song},
-      journal={arXiv preprint arXiv:2401.03804},
-      year={2024}
-}
-```
-
-## 模型性能
-
-基于910B
-
-telechat_7b:
-
-| config                                                | task                  | Datasets   | SeqLength | phase           | performance  |
-|-------------------------------------------------------| --------------------- |------------|-----------|-----------------|--------------|
-| [telechat_7b](./run_telechat_7b_910b.yaml)            | text_generation       | example_dataset | 2048      | [train](#预训练)   | 1940 tks/s/p |
-| [telechat_7b](./run_telechat_7b_910b_finetune.yaml)   | text_generation       | example_dataset     | 2048      | [finetune](#微调) | 1925 tks/s/p |
-| [telechat_7b](./run_telechat_7b_910b_finetune.yaml)   | text_generation       | example_dataset     | 2048      | [predict](#推理)  | 27 tks/s/p   |
-
-telechat_12b:
-
-| config                                                | task                  | Datasets   | SeqLength | phase           | performance  |
-|-------------------------------------------------------| --------------------- |------------|-----------|-----------------|--------------|
-| [telechat_12b](./run_telechat_12b_910b.yaml)          | text_generation       | example_dataset | 1024      | [train](#预训练)   | 1433 tks/s/p |
-| [telechat_12b](./run_telechat_12b_910b_finetune.yaml) | text_generation       | example_dataset     | 1024      | [finetune](#微调) | 1433 tks/s/p |
-| [telechat_12b](./run_telechat_12b_910b_finetune.yaml) | text_generation       | example_dataset     | 1024      | [predict](#推理)  | 20 tks/s/p   |
-
-## 仓库介绍
-
-`Telechat` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`mindformers/research/telechat`
-
-   ```bash
-   telechat
-       ├── convert_weight_ms_to_torch.py         # ms->torch权重转换脚本
-       ├── convert_weight_torch_to_ms.py         # torch->ms权重转换脚本
-       ├── telechat_preprocess.py                # telechat模型的mindrecord数据处理脚本
-       ├── telechat.py                           # 模型实现
-       ├── telechat_config.py                    # 模型配置项
-       ├── telechat_layer.py                     # telechat网络层定义
-       ├── telechat_predict_utils.py             # telechat推理模块
-       ├── telechat_tokenizer.py                 # telechat tokenizer
-       └── telechat_transformer.py               # transformer层实现
-   ```
-
-2. 模型配置：`mindformers/research/telechat`
-
-   ```bash
-   telechat
-       ├── run_telechat_7b_910b.yaml             # 7b模型预训练启动配置
-       ├── run_telechat_7b_finetune_910b.yaml    # 7b全量微调启动配置
-       ├── run_telechat_12b_910b.yaml            # 12b模型预训练启动配置
-       └── run_telechat_12b_finetune_910b.yaml   # 12b全量微调启动配置
-   ```
-
-3. 任务启动脚本：`mindformers/research/telechat`
-
-   ```text
-   telechat
-       ├── run_telechat_predict.py              # 推理脚本
-       └── run_telechat.py                      # telechat高阶接口使用脚本
-   ```
-
-## 前期准备
-
-### 环境要求
-
-- 硬件：Atlas 800T A2
-- MindSpore：2.2.11
-- CANN: 7.1
-- MindFormers版本：dev
-
-注：Atlas 800T A2芯片：7b, 12b推理可在单机单卡上完成部署。
-
-### [mindformers安装](../../README.md#二mindformers安装)
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成**
-
-RANK_TABLE_FILE 单机8卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "1",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"},
-                {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"},
-                {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"},
-                {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"},
-                {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"},
-                {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"},
-                {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"},
-                {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}],
-             "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 多机RANK_TABLE_FILE合并(多机多卡必备环节)
-
-- step 1. 首先根据上章节内容，在每个机器上生成各自的`RANK_TABLE_FILE`文件，然后将不同机器上生成的`RANK_TABLE_FILE`文件全部拷贝到同一台机器上。
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python ./mindformers/tools/hccl_tools.py --device_num "[0,8)" --server_ip xx.xx.xx.xx
-```
-
-**注：需要根据机器的ip地址指定 --server_ip，避免由于不同机器server_ip不同，导致多节点间通信失败。**
-
-- step 2. 运行mindformers/tools/merge_hccl.py将不同机器上生成的`RANK_TABLE_FILE`文件合并
-
-```bash
-# 运行如下命令，合并每个机器上的RANK_TABLE_FILE的json文件。
-python ./mindformers/tools/merge_hccl.py hccl*.json
-```
-
-- step 3. 将合并后的`RANK_TABLE_FILE`文件拷贝到所有机器中，保证不同机器上的`RANK_TABLE_FILE`相同。
-
-RANK_TABLE_FILE 双机16卡参考样例:
-
-```json
-{
-    "version": "1.0",
-    "server_count": "2",
-    "server_list": [
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.0", "rank_id": "0"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.0", "rank_id": "1"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.0", "rank_id": "2"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.0", "rank_id": "3"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.1", "rank_id": "4"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.1", "rank_id": "5"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.1", "rank_id": "6"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.1", "rank_id": "7"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        },
-        {
-            "server_id": "xx.xx.xx.xx",
-            "device": [
-                {
-                    "device_id": "0", "device_ip": "192.168.0.1", "rank_id": "8"
-                },
-                {
-                    "device_id": "1", "device_ip": "192.168.1.1", "rank_id": "9"
-                },
-                {
-                    "device_id": "2", "device_ip": "192.168.2.1", "rank_id": "10"
-                },
-                {
-                    "device_id": "3", "device_ip": "192.168.3.1", "rank_id": "11"
-                },
-                {
-                    "device_id": "4", "device_ip": "192.168.0.2", "rank_id": "12"
-                },
-                {
-                    "device_id": "5", "device_ip": "192.168.1.2", "rank_id": "13"
-                },
-                {
-                    "device_id": "6", "device_ip": "192.168.2.2", "rank_id": "14"
-                },
-                {
-                    "device_id": "7", "device_ip": "192.168.3.2", "rank_id": "15"
-                }
-            ],
-            "host_nic_ip": "reserve"
-        }
-    ],
-    "status": "completed"
-}
-```
-
-### 模型权重下载与转换（Telechat-7B为例）
-
-开发者可以下载获取官方权重后，通过下面提供的**权重转换脚本**，将官方权重转换为MindSpore权重；或直接使用MindFormers提供的**已转换权重**
-
-1.torch模型权重及词模型下载链接：
-
-- [telechat-7b](https://huggingface.co/Tele-AI/Telechat-7B/)
-- [telechat-12b](https://huggingface.co/Tele-AI/TeleChat-12B)
-
-下载完成后，运行如下转换脚本，将全量微调的权重转换为完整的ckpt权重。
-
-```shell
-python mindformers/research/telechat/convert_weight_torch_to_ms.py \
---torch_path TORCH_CKPT_DIR \
---mindspore_path {path}/MS_CKPT_NAME \
---model_name 'telechat_7b' \
-```
-
-```text
-# 参数说明
-torch_path: torch版本权重保存目录路径
-mindspore_path: 权重保存文件名，可以指定自定义保存路径
-model_name: 模型的名称
-```
-
-2.获取MindFormers提供的已转换权重，可直接从下面的链接获取。
-
-- [telechat-7b](https://telechat-docker.obs.cn-north-4.myhuaweicloud.com/model_weight/mindspore.ckpt)
-- [telechat-12b](https://telechat-docker.obs.cn-north-4.myhuaweicloud.com/model_weight/mindspore_12B.ckpt)
-
-### [分布式训练/微调权重合并](../../docs/feature_cards/Transform_Ckpt.md)
-
-分布式训练/微调后所得到的权重文件为根据策略切分后的权重，需要手动将切分权重合一，以用于评估和推理。
-
-涉及到ckpt的单卡，多卡转换，详细教程请参考特性文档模型[权重切分与合并](../../docs/feature_cards/Transform_Ckpt.md)
-
-- step 1. 获取模型切分策略文件：
-
-在执行微调脚本时，模型完成编译后，将会在`output/strategy`路径下生成各卡的切分策略文件，用于权重合并。
-
-- step 2. 运行`mindformers/tools/transform_ckpt.py`脚本进行多卡权重合并：
-
-```shell
-python transform_ckpt.py \
---src_ckpt_strategy {path}/output/strategy/ \
---src_ckpt_dir {path}/output/checkpoint/ \
---dst_ckpt_dir {path}/target_checkpoint/ \
---prefix telechat_7b
-```
-
-```text
-# 参数说明
-src_ckpt_strategy: 步骤1中的切分策略文件路径
-src_ckpt_dir: 原切分权重文件夹
-dst_ckpt_dir: 目标路径
-prefix: ckpt文件前缀名
-```
-
-> 注：`transform_checkpoints` 接口当前仅mindspore 2.0以上版本支持，如当前硬件环境只支持2.0以下版本，可以新建conda环境安装mindspore 2.0的cpu版本以执行该脚本
-
-## 预训练（Telechat-7B为例）
-
-### 数据集准备
-
-step 1. 获取数据集
-
-[数据集](https://telechat-docker.obs.cn-north-4.myhuaweicloud.com/example_dataset.jsonl)
-
-数据集的格式：
-
-```text
-# input_dataset examples:
-    {"input": "电信主卡和副卡的区别在哪里？", "output": "主卡和副卡的主要区别在于，主卡只能使用一张手机号码。<_end>"}
-```
-
-step 2. 处理数据成mindrecord格式
-
-```bash
-# 使用mindformers/research/telechat/telechat_preprocess.py进行数据预处理+Mindrecord数据生成
-# 由于此工具依赖AutoTokenizer，所以需要提前下载transformers
-python telechat_preprocess.py \
---input_dataset_file /{path}/input_dataset.jsonl \
---vocab_file_path /{path}/tokenizer.model \
---max_length 2048 \
---output_path /{path}/output_dataset.mindrecord
-```
-
-```text
-# 参数说明
-input_dataset_file: 预训练的数据集
-vocab_file_path: 词模型文件路径(如使用上述链接下载，指定到对应路径下即可)
-max_length: 数据集长度
-output_path: 生成数据集的路径
-```
-
-### 脚本启动
-
-多卡运行需要RANK_FILE_TABLE，请参考前期准备-[生成RANK_TABLE_FILE](#生成rank_table_file多卡运行必须环节)
-
-#### 多卡训练
-
-##### 单机多卡
-
-- step 1. 修改模型对应的配置文件。
-
-在模型对应的配置文件`research/telechat/run_telechat_7b_910b.yaml`中，用户可自行修改模型、训练相关参数(推荐开启flash_attention，可加速训练)，并通过`train_dataset`的`dataset_dir`参数，指定训练数据集的路径。
-
-配置文件中各参数含义详见[Config配置说明文档](https://gitee.com/mindspore/mindformers/blob/master/configs/README.md)。auto_parallel说明详见[自动并行](../../docs/feature_cards/Auto_Parallel.md)。
-
-- step2. 设置环境变量，变量配置如下：
-
-```bash
-export MS_ASCEND_CHECK_OVERFLOW_MODE="INFNAN_MODE"  # 推荐开启饱和模式
-```
-
-- step3. 启动训练任务，在单机上拉起任务。
-
-```shell
-cd mindformers/research
-
-bash run_singlenode.sh \
-"python telechat/run_telechat.py \
---config telechat/run_telechat_7b_910b.yaml \
---run_mode train \
---train_data dataset_dir" \
-RANK_TABLE_FILE [0,8] 8
-```
-
-```text
-# 参数说明
-config: 配置文件路径
-run_mode: 运行模式，预训练时设置为train
-train_data: 训练数据集文件夹路径
-RANK_TABLE_FILE: 生成的rank_table文件
-```
-
-##### 多机多卡
-
-- step 1. 多机多卡运行需要合并不同机器的RANK_FILE_TABLE，参考前期准备-[多机RANK_TABLE_FILE合并](#多机rank_table_file合并多机多卡必备环节)
-
-> **注：需要保证执行的节点和RANK_TABLE_FIEL的节点顺序保持一致，即rank_id匹配。**
-
-- step 2. 根据服务器节点数等信息，修改相应的配置。
-
-```yaml
-# 以telechat-7b模型两机训练为例，默认配置2机16卡，如果节点数有变，需要修改相应的配置。
-# 配置文件路径：run_telechat_7b_910b.yaml
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 2
-  micro_batch_num: 16
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-```
-
-- step 3. 执行运行脚本。
-
-在多机上同时拉起任务，每台机器拉起方式参考单机多卡启动方式。
-
-```shell
-cd mindformers/research
-
-# 第一台机器
-bash run_multinode.sh \
-"python telechat/run_telechat.py \
---config telechat/run_telechat_7b_910b.yaml \
---run_mode train \
---train_data dataset_dir" \
-RANK_TABLE_FILE [0,8] 16
-
-# 第二台机器
-bash run_multinode.sh \
-"python telechat/run_telechat.py \
---config telechat/run_telechat_7b_910b.yaml \
---run_mode train \
---train_data dataset_dir" \
-RANK_TABLE_FILE [8,16] 16
-```
-
-```text
-# 参数说明
-config: 配置文件路径
-run_mode: 运行模式，预训练时设置为train
-train_data: 训练数据集文件夹路径
-RANK_TABLE_FILE: 生成的rank_table文件
-```
-
-## 微调（Telechat-7B为例）
-
-### 数据集准备
-
-目前使用的数据集样例由中电信人工智能科技有限公司提供，该样例的预处理脚本可用于全参微调任务，详细数据集格式以及数据处理参考预训练格式样例。
-
-### 全参微调
-
-当前模型已支持使用**Flash Attention算法**进行全参微调，推荐开启flash_attention，可加速训练。详请参考 [Flash Attention使用文档](../../docs/feature_cards/Training_Algorithms.md#flash-attention)
-
-- step 1. 参考`research/telechat/run_telechat_7b_910b_finetune.yaml`中训练数据集路径为微调数据集路径。
-
-```python
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "/{path}/"
-    shuffle: True
-```
-
-- step 2. 修改微调时学习率, 优化器参数，`seq_length`, 新增 `context`中参数, 与预训练不同，微调配置如下：
-
-```python
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-5
-  learning_rate: 1.e-5
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 1.e-5
-  lr_end: 0
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# model config
-model:
-  model_config:
-    type: TelechatConfig
-    model_name: 'telechat_7b'
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-```
-
-- step 3. 微调`telechat-7b`时修改并行策略配置，配置如下：
-
-```python
-# parallel_config
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-  pipeline_stage: 1
-```
-
-- step4. 设置环境变量，变量配置如下：
-
-```bash
-export MS_DEV_SIDE_EFFECT_LOAD_ELIM=3  # 去除TensorMove
-export MS_MEMORY_POOL_RECYCLE=1  # 内存优化
-export GE_NOT_CUT=1   # 内存优化
-export MS_ASCEND_CHECK_OVERFLOW_MODE="INFNAN_MODE"  # telechat_7b 不用设置该项
-```
-
-- step 5. 添加预训练权重路径，修改配置文件中的`load_checkpoint`，配置预训练权重路径。
-- step 6. 启动微调任务，telechat-7b模型以单机八卡为例进行微调，命令如下：
-
-```shell
-cd mindformers/research
-
-bash run_singlenode.sh \
-"python telechat/run_telechat.py \
---config telechat/run_telechat_7b_910b_finetune.yaml \
---load_checkpoint model_dir \
---run_mode finetune \
---train_data dataset_dir" \
-RANK_TABLE_FILE [0,8] 8
-
-# 参数说明
-config: 配置文件路径
-load_checkpoint: 权重文件夹路径
-load_checkpoint: 预训练模型权重文件
-run_mode: 运行模式，微调时设置为finetune
-train_data: 训练数据集文件夹路径
-```
-
-## 推理（Telechat-7B为例）
-
-推理时将配置文件中`param_init_type`修改为和全量微调一致的数据类型。
-
-```python
-# context_config 910B推理添加ascend_config
-context:
-  ascend_config:
-    precision_mode: "must_keep_origin_dtype"
-```
-
-### 单卡generate推理
-
-1. telechat用于在线推理，输入按照 "question"的模板格式输入，910B支持单卡推理。主要参数配置参考:
-
-```yaml
-load_checkpoint: 'path/to/telechat.ckpt'            # 填写权重路径
-use_past: True                                      # 使用增量推理
-use_parallel: False                                 # 关闭并行模式
-```
-
-2. 启动推理
-
-```shell
-cd research
-python telechat/run_telechat_predict.py --input_file /path/to/infer_file.jsonl --vocab_file path/to/tokenizer.model --yaml_file path/to/config_yaml
-
-# 参数说明
-input_file: 输入的问题文件
-yaml_file: 模型的配置文件
-vocab_file: 配置词表路径
-```
-
-7B 模型推理结果如下：
-
-```text
-生抽与老抽的区别？ 生抽和老抽是两种不同的酱油，它们的区别如下：
-1. 原料不同：生抽是用大豆、小麦等谷物为原料制成的；而老抽则是用豆酱、面酱等发酵后的调味品为原料制成的。
-2. 制作工艺不同：生抽是通过将大豆浸泡在水中，然后经过蒸煮、发酵等过程制成的；而老抽则是在生抽的基础上加入一定比例的盐、糖、味精等调料，再进行发酵制成的。
-3. 口感和风味不同：生抽具有咸鲜的味道，口感比较清爽；而老抽则具有特殊的香味和味道，口感相对较重。
-总的来说，生抽和老抽都是酱油的不同种类，它们在原料、制作工艺和口感等方面都有所不同。
-```
-
-12B 模型推理结果如下：
-
-```text
-生抽与老抽的区别？ 生抽和老抽是两种不同的酱油，它们在风味、色泽和用途上都有所区别。
-1. 颜色：生抽的颜色比较淡，而老抽的颜色较深。生抽的颜色呈红褐色或棕红色，而老抽的颜色则更偏向棕黑色。
-2. 味道：生抽具有鲜美的咸味和微甜的味道，而老抽则具有浓郁的酱香味和深厚的味道。由于生抽的含盐量较低，所以它更适合用于调味和提鲜，而老抽则更适合用于炖煮和烧煮菜肴。
-3. 用途：生抽通常用于调味，如炒菜、拌菜、腌制等，而老抽则更适合用于烧肉、炖菜、烧鱼等需要突出酱香味的菜肴。
-总之，生抽和老抽在颜色、味道和用途上都有所不同，可以根据个人口味和烹饪需求选择适合的酱油品种。
-```
diff --git a/research/telechat/telechat.py b/research/telechat/telechat.py
deleted file mode 100644
index a548b82f..00000000
--- a/research/telechat/telechat.py
+++ /dev/null
@@ -1,447 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat models' APIs."""
-import copy
-import numpy as np
-import mindspore.common.dtype as mstype
-
-from mindspore import Tensor, nn
-from mindspore.context import ParallelMode
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation
-
-from mindformers.core.loss.loss import CrossEntropyLoss
-from mindformers.models.utils import lazy_inline
-from mindformers.models.modeling_utils import PreTrainedModel
-from mindformers.modules.layers import Linear, FreqsMgr
-from mindformers.modules.transformer.transformer import LowerTriangularMaskWithDynamic
-from mindformers.modules.transformer.op_parallel_config import _check_config
-from mindformers.tools.logger import logger
-from mindformers.models.llama.llama_layer import LlamaRMSNorm
-from research.telechat.telechat_config import TelechatConfig
-from research.telechat.telechat_layer import TelechatEmbedding
-from research.telechat.telechat_transformer import TelechatDecodeLayer
-
-__all__ = ['TelechatModel', 'TelechatForCausalLM']
-
-
-class TelechatPreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = TelechatConfig
-    base_model_prefix = "telechat"
-
-
-def layer_compute_dtype(layer, layer_id, offset, parallel_config, n_layers, select_recompute=False):
-    r"""
-        Default setting for the pipeline is: `(layer_id + offset) // (layers / pipeline_stage)`.
-
-        Args:
-            layer(Cell) - Represents the transformer block
-            parallel_config(dict) - Parallel Config
-            layer_id(int) - Means the layer index for the current module, counts from zero.
-            offset(Union[int, List[int]]) - Means the layer_index needs a offset, if there are other modules in the net.
-            n_layers(int) - The total layers used for the model.
-    """
-    pp_dis = max(int((n_layers + 1) / parallel_config.pipeline_stage), 1)
-    if isinstance(offset, list):
-        if len(offset) != parallel_config.pipeline_stage:
-            raise ValueError(f"The length of `offset` {len(offset)} do not match "
-                             "`pipeline stage` {parallel_config.pipeline_stage}.")
-        i = min(layer_id // pp_dis, parallel_config.pipeline_stage - 1)
-        offset_layer = offset[i]
-    elif isinstance(offset, int):
-        offset_layer = offset
-    else:
-        raise TypeError(f"`offset` must be `int` of list of `int`, but got {type(offset)}.")
-
-    pp_id = min((layer_id + offset_layer) // pp_dis, parallel_config.pipeline_stage - 1)
-    layer.pipeline_stage = pp_id
-
-    # Used for optimizer's fusion tag
-    dis = max(int((n_layers + 1) / parallel_config.gradient_aggregation_group), 1)
-    if parallel_config.pipeline_stage > 1:
-        layer.set_comm_fusion(2)
-    else:
-        layer.set_comm_fusion(int((layer_id + offset_layer) / dis) + 1)
-    if isinstance(parallel_config.recompute, bool):
-        if parallel_config.recompute and not select_recompute:
-            layer.recompute()
-    else:
-        if parallel_config.recompute.recompute and not select_recompute:
-            layer.recompute(
-                recompute_slice_activation=parallel_config.recompute.recompute_slice_activation)
-
-
-class TelechatModel(TelechatPreTrainedModel):
-    r"""
-    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`TelechatDecoderLayer`]
-    Args:
-        config(TelechatConfig): the config of network
-
-    Returns:
-            output: Tensor, the output of Telechat decoderlayer
-    """
-
-    def __init__(self,
-                 config: TelechatConfig = None):
-        super().__init__(config, auto_prefix=True)
-        _check_config(config.parallel_config)
-        self.dtype = config.compute_dtype
-        self.hidden_size = config.hidden_size
-        self.num_layers = config.num_layers
-        self.n_head = config.num_heads
-        self.head_dim = self.hidden_size // self.n_head
-        self.pad_token_id = config.pad_token_id
-        self.is_first_iteration = True
-        self.use_past = config.use_past
-        self.is_dynamic = config.is_dynamic
-        self.use_flash_attention = config.use_flash_attention
-        if self.use_flash_attention:
-            logger.info("Enable flash attention.")
-        elif config.use_flash_attention:
-            logger.info("Current MindSpore do not support flash attention.")
-
-        self.shape = P.Shape()
-        self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
-        self.cast = P.Cast()
-        self.tile = P.Tile()
-        self.expand_dims = P.ExpandDims()
-        self.gather = P.Gather()
-        self.slice = P.StridedSlice()
-
-        self.freqs_mgr = FreqsMgr(head_dim=self.head_dim,
-                                  seq_length=config.seq_length,
-                                  max_position_embedding=config.max_position_embedding,
-                                  rotary_dtype=config.rotary_dtype,
-                                  theta=config.theta,
-                                  scaling_factor=config.scaling_factor,
-                                  extend_method=config.extend_method)
-        self.casual_mask = LowerTriangularMaskWithDynamic(seq_length=config.seq_length,
-                                                          compute_type=config.compute_dtype,
-                                                          is_dynamic=config.is_dynamic,
-                                                          pad_token_id=config.pad_token_id,
-                                                          use_flash_attention=config.use_flash_attention)
-        self.tok_embeddings = TelechatEmbedding(vocab_table_size=config.vocab_size,
-                                                embedding_size=config.hidden_size,
-                                                param_init_type=config.param_init_type)
-        self.layers = nn.CellList()
-        for layer_id in range(config.num_layers):
-            layer = TelechatDecodeLayer(config.seq_length,
-                                        layer_id,
-                                        dim=config.hidden_size,
-                                        n_heads=config.num_heads,
-                                        n_kv_heads=config.n_kv_heads,
-                                        hidden_dropout_prob=config.hidden_dropout_prob,
-                                        attention_dropout_prob=config.attention_dropout_prob,
-                                        intermediate_size=config.intermediate_size,
-                                        ffn_dim_multiplier=config.ffn_dim_multiplier,
-                                        norm_eps=config.rms_norm_eps,
-                                        qkv_has_bias=config.qkv_has_bias,
-                                        compute_dtype=config.compute_dtype,
-                                        layernorm_compute_dtype=config.layernorm_compute_type,
-                                        softmax_compute_dtype=config.softmax_compute_type,
-                                        rotary_dtype=config.rotary_dtype,
-                                        param_init_type=config.param_init_type,
-                                        use_past=config.use_past,
-                                        use_flash_attention=config.use_flash_attention,
-                                        block_size=config.block_size,
-                                        num_blocks=config.num_blocks,
-                                        is_dynamic=config.is_dynamic,
-                                        use_rope_slice=config.use_rope_slice,
-                                        parallel_config=config.parallel_config)
-            layer_compute_dtype(layer, layer_id, config.offset, config.parallel_config,
-                                config.num_layers, select_recompute=config.parallel_config.recompute.select_recompute)
-            self.layers.append(layer)
-        self.norm_out = LlamaRMSNorm(config.hidden_size, config.rms_norm_eps,
-                                     compute_type=config.layernorm_compute_type)
-
-        dp = config.parallel_config.data_parallel
-        if not (_get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation()):
-            self.tok_embeddings.pipeline_stage = 0
-            if config.parallel_config.pipeline_stage > 1:
-                self.norm_out.pipeline_stage = config.parallel_config.pipeline_stage - 1
-                self.tok_embeddings.set_comm_fusion(2)
-                self.norm_out.set_comm_fusion(2)
-            else:
-                self.tok_embeddings.set_comm_fusion(config.parallel_config.gradient_aggregation_group)
-                self.norm_out.set_comm_fusion(config.parallel_config.gradient_aggregation_group)
-
-            self.tok_embeddings.shard(config.parallel_config)
-            self.casual_mask.shard(config.parallel_config)
-            self.norm_out.shard((dp, 1, 1))
-
-    # pylint: disable=W0613
-    def construct(self, tokens: Tensor, batch_valid_length=None, batch_index=None, zactivate_len=None,
-                  block_tables=None, slot_mapping=None):
-        """
-        Forward of telechat model.
-
-        Args:
-            tokens: the tokenized inputs with datatype int32
-            input_position(Tensor): current position, used by model.predict.
-            init_reset(bool, optional): A bool tensor with shape [1], used to clear the past key parameter and
-                past value parameter used in the incremental prediction. Default True.
-            batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental
-                prediction. Tensor of shape :math:`(batch_size,)`. Default None.
-            block_tables (Tensor[int64]): Store mapping tables for each sequence.
-            slot_mapping (Tensor[int32]): Store token cache physical slot index.
-        Returns:
-            output: Tensor, the output of telechat decoderlayer
-        """
-        # preprocess
-        bs, seq_len = self.shape(tokens)
-        mask = None
-        if self.use_past:
-            if self.is_first_iteration:
-                freqs_cis = self.freqs_mgr(seq_len)
-                if self.use_flash_attention:
-                    if self.enable_asd_op:
-                        mask = self.casual_mask(tokens)  # mask: [bs, seq, seq]
-                        mask = self.cast(mask, mstype.float16)
-                else:
-                    mask = self.casual_mask(tokens)  # mask: [bs, seq, seq]
-            else:
-                freqs_cis = self.freqs_mgr.increment(batch_valid_length)
-        else:
-            freqs_cis = self.freqs_mgr(seq_len)
-            mask = self.casual_mask(tokens)  # mask: [bs, seq, seq]
-
-        # tokens: [bs, seq/1]
-        h, embedding_weight = self.tok_embeddings(tokens)
-        h = self.reshape(h, (bs, seq_len, self.hidden_size))
-        # h: [bs, seq/1, hidden_dim]
-        for i in range(self.num_layers):
-            h = self.layers[i](h, freqs_cis, mask, batch_valid_length=batch_valid_length, block_tables=block_tables,
-                               slot_mapping=slot_mapping)
-        output = self.norm_out(h)
-        return output, embedding_weight
-
-class TelechatHead(nn.Cell):
-    """Head for Telechat to get the logits of each token in the vocab."""
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 compute_dtype="float16",
-                 parallel_config=None):
-        super(TelechatHead, self).__init__()
-        copied_parallel_config = copy.deepcopy(parallel_config)
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.dtype = compute_dtype
-        self.cast = P.Cast()
-        self.reshape = P.Reshape()
-        dp = copied_parallel_config.data_parallel
-        mp = copied_parallel_config.model_parallel
-        if parallel_config.vocab_emb_dp or (out_channels % mp != 0):
-            self.matmul = P.MatMul(transpose_b=True).shard(((dp, 1), (1, 1)))
-        else:
-            self.matmul = P.MatMul(transpose_b=True).shard(((dp, 1), (mp, 1)))
-
-    def construct(self, x, embedding_weight=None):
-        out_shape = P.Shape()(x)[:-1] + (self.out_channels,)
-        x = self.reshape(x, (-1, self.in_channels))
-        ori_dtype = F.dtype(x)
-        weight = self.cast(embedding_weight, self.dtype)
-        x = self.cast(x, self.dtype)
-        x = self.matmul(x, weight)
-        x = self.cast(x, ori_dtype)
-        output = self.reshape(x, out_shape)
-        return output
-
-class TelechatForCausalLM(TelechatPreTrainedModel):
-    r"""
-        Provide telechat training loss or logits through network.
-
-        Args:
-            config (TelechatConfig): The config of telechat model.
-
-        Returns:
-            output: Tensor, the output of telechat decoderlayer
-        """
-
-    @lazy_inline
-    def __init__(self, config: TelechatConfig = None):
-        super(TelechatForCausalLM, self).__init__(config, auto_prefix=True)
-        _check_config(config.parallel_config)
-        self.config = config
-        self.model_name = config.model_name
-        self.ignore_token_id = config.ignore_token_id
-        self.pad_token_id = config.pad_token_id
-        self.use_past = config.use_past
-        self.vocab_size = config.vocab_size
-        self.is_first_iteration = True
-
-        self.shape = P.Shape()
-        self.reshape = P.Reshape()
-        if config.is_dynamic:
-            self.reshape.add_prim_attr("skip_redistribution", True)
-        self.cast = P.Cast()
-        self.slice = P.StridedSlice()
-        self.logits_slice = P.StridedSlice()
-        self.not_equal = P.NotEqual()
-        self.mul = P.Mul()
-        self.add = P.Add()
-        self.ones = P.Ones()
-        self.gather = P.Gather(1)
-        self.sub_batch_valid_len = P.Sub()
-        self.model = TelechatModel(config=config)
-        if self.model_name == 'telechat_12b':
-            self.lm_head = Linear(in_channels=config.hidden_size,
-                                  out_channels=config.vocab_size,
-                                  has_bias=False,
-                                  compute_dtype=config.compute_dtype,
-                                  param_init_type=config.param_init_type,
-                                  skip_redistribution=config.is_dynamic,
-                                  weight_init="normal") # meta default: xavier_normal
-        else:
-            self.lm_head = TelechatHead(in_channels=config.hidden_size,
-                                        out_channels=config.vocab_size,
-                                        compute_dtype=config.compute_dtype,
-                                        parallel_config=config.parallel_config)
-
-        mp = config.parallel_config.model_parallel
-        vocab_size = config.vocab_size
-        loss_parallel_config = copy.deepcopy(config.parallel_config)
-        if vocab_size % mp != 0:
-            logger.warning("The vocab size of Loss is: %s, it is not divide by model_parallel: %s",
-                           vocab_size, mp)
-            logger.warning("Now, the model_parallel num of Loss will be changed: mp = 1")
-            loss_parallel_config.model_parallel = 1
-        self.loss = CrossEntropyLoss(parallel_config=loss_parallel_config)
-        self.seq_length = config.seq_length
-
-        dp = config.parallel_config.data_parallel
-        if not (_get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation()):
-            self.slice.shard(((dp, 1),))
-            self.logits_slice.shard(((dp, 1, 1),))
-            self.not_equal.shard(((dp, 1), ()))
-            self.mul.shard(((dp, 1), (dp, 1)))
-            self.add.shard(((dp, 1), ()))
-            self.gather.shard(((dp, 1, 1), (dp,)))
-            self.sub_batch_valid_len.shard(((1,), ()))
-            if self.model_name == 'telechat_12b':
-                if config.parallel_config.vocab_emb_dp or (vocab_size % mp != 0):
-                    self.lm_head.shard(strategy_matmul=((dp, 1), (1, 1)))
-                else:
-                    self.lm_head.shard(strategy_matmul=((dp, 1), (mp, 1)))
-            if config.parallel_config.pipeline_stage > 1:
-                self.lm_head.pipeline_stage = config.parallel_config.pipeline_stage - 1
-
-        self.load_checkpoint(config)
-        self.set_model_predict_config()
-
-    # pylint: disable=W0613
-    def prepare_inputs_for_generation(self, input_ids, **kwargs):
-        return {
-            "input_ids": Tensor(input_ids, mstype.int32)
-        }
-
-    # pylint: disable=W0613
-    def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
-        """Get telechat model input tuple for transform ckpt."""
-        input_ids = Tensor(input_ids, mstype.int32)
-        labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
-        return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
-
-    def set_dynamic_inputs(self, **kwargs):
-        dynamic_input_ids = Tensor(shape=[None, None], dtype=mstype.int32)
-        dynamic_input_position = Tensor(shape=[None], dtype=mstype.int32)
-        dynamic_init_reset = Tensor([False], mstype.bool_)
-        dynamic_batch_valid_length = Tensor(shape=[None, None], dtype=mstype.int32)
-        dynamic_block_tables = Tensor(shape=[None, None], dtype=mstype.int32)
-        dynamic_slot_mapping = Tensor(shape=[None], dtype=mstype.int32)
-        self.set_inputs(dynamic_input_ids, None, dynamic_input_position, None, None, None, dynamic_init_reset,
-                        dynamic_batch_valid_length, None, None, dynamic_block_tables, dynamic_slot_mapping)
-        logger.info("Set dynamic input for telechat.")
-
-    def add_flags_custom(self, is_first_iteration):
-        """Add customized attributes for specific cells in the model."""
-        self.add_flags(is_first_iteration=is_first_iteration)
-        self.model.add_flags(is_first_iteration=is_first_iteration)
-        for layer in self.model.layers:
-            layer.add_flags(is_first_iteration=is_first_iteration)
-            layer.attention.infer_attention.add_flags(is_first_iteration=is_first_iteration)
-            layer.attention.infer_attention.rotary_embedding.add_flags(is_first_iteration=is_first_iteration)
-
-    # pylint: disable=W0613
-    def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,
-                  input_embeds=None, init_reset=True, batch_valid_length=None, batch_index=None, zactivate_len=None,
-                  block_tables=None, slot_mapping=None):
-        r"""
-        TelechatForCausalLM forward.
-
-        Args:
-            input_ids(Tensor): the tokenized inputs with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
-            labels(Tensor): the tokenized labels with datatype int32, Tensor of shape :math:`(batch, seq\_length)`.
-            input_position(Tensor): current position, used by model.predict.
-            position_ids(Tensor): Reserved param, not used.
-            attention_mask(Tensor): Reserved param, not used.
-            input_embeds(Tensor): Reserved param, not used.
-            init_reset(bool, optional): A bool tensor with shape [1], used to clear the past key parameter and
-                past value parameter used in the incremental prediction. Default True.
-            batch_valid_length(Tensor): the past calculated the index with datatype int32, used for incremental
-                prediction. Tensor of shape :math:`(batch_size,)`. Default None.
-            block_tables (Tensor[int64]): Store mapping tables for each sequence.
-            slot_mapping (Tensor[int32]): Store token cache physical slot index.
-        Returns:
-            Tensor: The loss or (logits, tokens, input_mask) of the network.
-        """
-        bsz, seqlen = self.shape(input_ids)
-        if self.use_past:
-            if not isinstance(batch_valid_length, Tensor):
-                batch_valid_length = self.ones((bsz,), mstype.int32)
-
-        tokens = input_ids
-        if batch_valid_length is not None:
-            batch_valid_length = self.reshape(batch_valid_length, (-1,))
-        if not self.is_first_iteration:
-            batch_valid_length = self.sub_batch_valid_len(batch_valid_length, 1)
-        output, embedding_weight = self.model(tokens, batch_valid_length, batch_index, zactivate_len, block_tables,
-                                              slot_mapping)
-        pre_gather = (not self.use_past or self.is_first_iteration) and batch_valid_length is not None
-        if pre_gather:
-            output = self.gather(output, self.sub_batch_valid_len(batch_valid_length, 1), 1)
-        if self.model_name == 'telechat_12b':
-            logits = self.lm_head(output)
-        else:
-            logits = self.lm_head(output, embedding_weight)
-        input_mask = self.cast(self.not_equal(tokens, self.pad_token_id), mstype.float32)
-        if labels is not None:
-            input_mask = labels
-        labels = input_ids
-        if not self.training:
-            if not pre_gather:
-                logits = self.reshape(logits, (bsz, seqlen, -1))
-            logits = self.cast(logits, mstype.float32)
-            # makes cast effective to avoid allgather issue in Mindspore1.10
-            input_mask = self.add(input_mask, 1)
-            return logits, tokens, input_mask
-        logits = self.logits_slice(logits, (0, 0, 0), (bsz, seqlen - 1, self.vocab_size), (1, 1, 1))
-        labels = self.slice(labels, (0, 1), (bsz, seqlen), (1, 1))
-        input_mask = self.slice(input_mask, (0, 1), (bsz, seqlen), (1, 1))
-        if logits.ndim > 2:
-            logits = self.reshape(logits, (-1, logits.shape[-1]))
-        logits = self.cast(logits, mstype.float32)
-        labels = self.reshape(labels, (-1,))
-        input_mask = self.reshape(input_mask, (-1,))
-        loss = self.loss(logits, labels, input_mask)
-        return loss
diff --git a/research/telechat/telechat_config.py b/research/telechat/telechat_config.py
deleted file mode 100644
index 35f5e51c..00000000
--- a/research/telechat/telechat_config.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat Config API."""
-from typing import Optional
-from mindformers.models.utils import convert_mstype
-from mindformers.models.configuration_utils import PretrainedConfig
-from mindformers.modules.transformer.transformer import default_transformer_config, TransformerOpParallelConfig
-
-__all__ = ['TelechatConfig']
-
-
-class TelechatConfig(PretrainedConfig):
-    """
-    Telechat config class which defines the model size.
-
-    Args:
-        batch_size (Optional[int]): batch size for input data, use in predict.
-        seq_length (Optional[int]): The sequence length of input_ids, default is 1024.
-        vocab_size (`int`, *optional*, defaults to 50257):
-            Vocabulary size of the BERT model.
-        hidden_size (`int`, *optional*, defaults to 768):
-            Dimensionality of the encoder layers and the pooler layer.
-        num_layers (`int`, *optional*, defaults to 12):
-            Number of hidden layers in the Transformer encoder.
-        num_heads (`int`, *optional*, defaults to 12):
-            Number of attention heads for each attention layer in the Transformer encoder.
-        multiple_of (Optional[int]): Define SwiGLU hidden layer size multiples, default 256.
-        n_kv_heads (Optional[int]): Define multi group head attention heads number, default None.
-        ffn_dim_multiplier (Optional[int]): Define ffn layer dim multiples, default None.
-        rms_norm_eps (Optional[float]): The epsilon value of the denominator. Default 1e-5.
-        bos_token_id (Optional[int]): The id of the *beginning-of-sequence* token.
-        eos_token_id (Optional[int]): The id of the *end-of-sequence* token.
-        pad_token_id (Optional[int]): The id of the *padding* token.
-        ignore_token_id (Optional[int]): The id of the *ignoring* token.
-        compute_dtype (Optional[str]):
-            Linear layer compute dtype, default is "float16".
-        layernorm_compute_type (Optional[str]):
-            layernorm compute dtype, default is "float32".
-        softmax_compute_type (Optional[str]):
-            softmax compute dtype, default is "float32".
-        rotary_dtype (Optional[str]):
-            rope compute dtype, default is "float32".
-        param_init_type (Optional[str]):
-            parameter initial dtype, default is "float16".
-        qkv_has_bias (Optional[bool]):
-            Whether the Query, Key, and Value projection has bias.
-        use_past (`bool`, *optional*, defaults to `False`):
-            Whether the model should use the past last key/values attentions
-            (if applicable to the model) to speed up decoding.
-        parallel_config(TransformerOpParallelConfig):
-            The parallel configure. Default `default_transformer_config`,
-            an instance of `TransformerOpParallelConfig` with default args.
-        extend_method(str): The extend method of seq length of inferencem,default None.
-        use_flash_attention(bool): Whether enable flash attention ops, default False.
-        offset(int): Offset of transformer layer when set pipeline stage number.
-        checkpoint_name_or_path (Optional[str]):
-            checkpoint path or name used to load to the network.
-        repetition_penalty (`float`, *optional*, defaults to 1.0):
-            The parameter for repetition penalty. 1.0 means no penalty. See [this
-            paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
-        max_decode_length (`int`, *optional*, defaults to 1024):
-            The maximum length the generated tokens can have. Corresponds to the length of the input prompt +
-            `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
-        top_k (`int`, *optional*, defaults to 5):
-            The number of highest probability vocabulary tokens to keep for top-k-filtering.
-        top_p (`float`, *optional*, defaults to 1.0):
-            If set to float < 1, only the smallest set of most probable tokens with probabilities
-            that add up to `top_p` or higher are kept for generation.
-        do_sample (`bool`, *optional*, defaults to `False`):
-            Whether or not to use sampling ; use greedy decoding otherwise.
-
-        Returns:
-            Class, TelechatConfig.
-    """
-
-    model_type = "telechat"
-
-    def __init__(self,
-                 batch_size: int = 1,
-                 seq_length: int = 2048,
-                 hidden_size: int = 4096,
-                 num_layers: int = 32,
-                 num_heads: int = 32,
-                 hidden_dropout_prob: float = 1.0,
-                 attention_dropout_prob: float = 1.0,
-                 n_kv_heads: Optional[int] = None,
-                 max_position_embedding: Optional[int] = None,
-                 intermediate_size: Optional[int] = None,
-                 vocab_size: int = 32000,   # defined later by tokenizer
-                 ffn_dim_multiplier: Optional[int] = None,
-                 rms_norm_eps: float = 1e-5,
-                 bos_token_id: int = 1,
-                 eos_token_id: int = 2,
-                 pad_token_id: int = 0,
-                 ignore_token_id: int = -100,
-                 theta: float = 10000.0,
-                 compute_dtype: str = "float16",
-                 layernorm_compute_type: str = "float32",
-                 softmax_compute_type: str = "float32",
-                 rotary_dtype: str = "float32",
-                 param_init_type: str = "float16",
-                 qkv_has_bias: bool = False,
-                 parallel_config: TransformerOpParallelConfig = default_transformer_config,
-                 use_past: bool = False,
-                 extend_method: str = "None",
-                 scaling_factor: float = 1.0,
-                 is_dynamic: bool = False,
-                 use_rope_slice: bool = False,
-                 use_flash_attention: bool = False,
-                 fine_grain_interleave: int = 1,
-                 offset: int = 0,
-                 checkpoint_name_or_path: str = "",
-                 repetition_penalty: float = 1.0,
-                 max_decode_length: int = 1024,
-                 block_size: int = 16,
-                 num_blocks: int = 512,
-                 top_k: int = 5,
-                 top_p: float = 1.0,
-                 do_sample: bool = True,
-                 **kwargs):
-        super(TelechatConfig, self).__init__(**kwargs)
-        self.batch_size = batch_size
-        self.seq_length = seq_length
-        self.vocab_size = vocab_size
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_dropout_prob = attention_dropout_prob
-        self.max_position_embedding = max_position_embedding if max_position_embedding else seq_length
-        self.intermediate_size = intermediate_size
-        self.n_kv_heads = n_kv_heads
-        self.ffn_dim_multiplier = ffn_dim_multiplier
-        self.rms_norm_eps = rms_norm_eps
-        self.param_init_type = convert_mstype(param_init_type)
-        self.qkv_has_bias = qkv_has_bias
-        self.layernorm_compute_type = convert_mstype(layernorm_compute_type)
-        self.softmax_compute_type = convert_mstype(softmax_compute_type)
-        self.rotary_dtype = convert_mstype(rotary_dtype)
-        self.compute_dtype = convert_mstype(compute_dtype)
-        self.parallel_config = parallel_config
-        self.checkpoint_name_or_path = checkpoint_name_or_path
-        self.bos_token_id = bos_token_id
-        self.eos_token_id = eos_token_id
-        self.pad_token_id = pad_token_id
-        self.ignore_token_id = ignore_token_id
-        self.use_past = use_past
-        self.extend_method = extend_method
-        self.scaling_factor = scaling_factor
-        self.is_dynamic = is_dynamic
-        self.use_rope_slice = use_rope_slice
-        self.use_flash_attention = use_flash_attention
-        self.fine_grain_interleave = fine_grain_interleave
-        self.offset = offset
-        self.repetition_penalty = repetition_penalty
-        self.max_decode_length = max_decode_length
-        self.top_k = top_k
-        self.top_p = top_p
-        self.do_sample = do_sample
-        self.theta = theta
-        self.block_size = block_size
-        self.num_blocks = num_blocks
diff --git a/research/telechat/telechat_layer.py b/research/telechat/telechat_layer.py
deleted file mode 100644
index 2eabf8af..00000000
--- a/research/telechat/telechat_layer.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright 2024  Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat Model Layers' APIs."""
-from mindspore.common.parameter import Parameter
-from mindspore import nn
-import mindspore.common.dtype as mstype
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.nn.cell import Cell
-
-try:
-    from mindspore._checkparam import Validator
-except ImportError:
-    import mindspore._checkparam as Validator
-from mindspore import log as logger
-from mindspore.common.initializer import initializer
-from mindspore.parallel._utils import _get_parallel_mode
-from mindspore.context import ParallelMode
-from mindformers.models.llama.llama_layer import LlamaSiLU
-from mindformers.modules.layers import Linear, Dropout, _check_input_dtype, _args_type_validator_check, _valid_value_checks
-from mindformers.tools.logger import _LogActionOnce
-
-
-class TelechatEmbedding(Cell):
-    """
-    Embedding Layer.
-
-    Args:
-            - **vocab_size** (int): Size of the dictionary of embeddings.
-            - **embedding_size** (int): The size of each embedding vector.
-            - **param_init_type** (mstype): The param init type, default mstype.float32.
-            - **parallel_config** (TransformerOpParallelConfig): The parallel config of network. Default
-                `default_embedding_parallel_config`, an instance of `EmbeddingOpParallelConfig` with default args.
-            - **param_init** (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the embedding_table.
-                Refer to class `initializer` for the values of string when a string
-                is specified. Default: 'normal'.
-    Inputs:
-            - **input_ids** (Tensor) - The tokenized inputs with datatype int32 with shape (batch_size, seq_length)
-
-    Outputs:
-            - **output** (Tensor) - The embedding vector for the input with shape (batch_size,
-              seq_length, embedding_size).
-    """
-
-    @_LogActionOnce(m_logger=logger, key='Embedding',
-                    no_warning=_get_parallel_mode() in (ParallelMode.STAND_ALONE,))
-    @_args_type_validator_check(vocab_table_size=Validator.check_positive_int,
-                                embedding_size=Validator.check_positive_int)
-    def __init__(self, vocab_table_size, embedding_size, param_init_type=mstype.float32, param_init='normal',
-                 parallel_optimizer=False):
-        super().__init__()
-        self.vocab_table_size = vocab_table_size
-        self.embedding_size = embedding_size
-        self.embedding_weight = Parameter(
-            initializer(param_init, [self.vocab_table_size, self.embedding_size], dtype=param_init_type),
-            name='embedding_weight', parallel_optimizer=parallel_optimizer)
-        self.gather = P.Gather()
-
-    def construct(self, input_ids):
-        """Forward of vocab embedding."""
-        _check_input_dtype(F.dtype(input_ids), "input_ids", [mstype.int32, mstype.int64], self.cls_name)
-        output = self.gather(self.embedding_weight, input_ids, 0)
-        return output, self.embedding_weight.value()
-
-    def shard(self, parallel_config):
-        """sharding for embedding"""
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        if parallel_config.vocab_emb_dp:
-            self.gather.shard(((1, 1), (dp, 1)))
-            logger.info(f"Using {dp} data parallel for the embedding lookup.")
-        else:
-            if self.vocab_table_size % mp != 0:
-                logger.warning("The vocab size of Loss is: %s, it is not divide by model_parallel: %s",
-                               self.vocab_table_size, mp)
-                logger.warning("Now, the model_parallel num of Loss will be changed: mp = 1")
-                self.gather.shard(((1, 1), (dp, 1)))
-            else:
-                self.gather.shard(((mp, 1), (dp, 1)))
-                logger.info(f"Using {dp} data parallel and {mp} "
-                            f"model parallel for the embedding lookup.")
-
-class TelechatLinear(Linear):
-    # pylint: disable=W0212
-    """
-    Linear function for Telechat.
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 weight_init='normal',
-                 bias_init='zeros',
-                 has_bias=True,
-                 activation=None,
-                 transpose_b=True,
-                 expert_num=1,
-                 outer_batch=1,
-                 param_init_type=mstype.float32,
-                 compute_dtype=mstype.float16,
-                 skip_redistribution=False,
-                 keep_prob=1.0):
-        super(TelechatLinear, self).__init__(
-            in_channels,
-            out_channels,
-            weight_init=weight_init,
-            bias_init=bias_init,
-            has_bias=has_bias,
-            activation=activation,
-            transpose_b=transpose_b,
-            expert_num=expert_num,
-            outer_batch=outer_batch,
-            param_init_type=param_init_type,
-            skip_redistribution=skip_redistribution,
-            compute_dtype=compute_dtype)
-
-        self.dropout = Dropout(keep_prob=keep_prob)
-
-    def construct(self, x):
-        """construct of linear."""
-        out_shape = P.Shape()(x)[:-1] + (self.out_channels,)
-        x = P.Reshape()(x, (-1, self.in_channels))
-        if self.expert_flag:
-            x = P.Reshape()(x, (self.outer_batch, self.expert_num, -1, self.in_channels))
-        weight = self.cast(self.weight, self.dtype)
-        x = self.matmul(x, weight)
-        x = self.dropout(x)
-        if self.has_bias:
-            x = self.bias_add(x, self.cast(self.bias, self.dtype))
-        if self.activation_flag:
-            x = self.activation(x)
-        output = P.Reshape()(x, out_shape)
-        return output
-
-
-class TelechatFeedForward(Cell):
-    r"""
-    Telechat FeedForward.
-
-    .. math::
-            (xW_1 * xW_3)W_2
-
-        Inputs:
-            - **x** (Tensor) - should be `[batch, seq_length, hidden_size] or [batch * seq_length, hidden_size]`.
-              Float tensor.
-
-        Outputs:
-            Tensor, the output of this layer after mapping. The shape is `[batch, seq_length, hidden_size] or
-            [batch * seq_length, hidden_size]`.
-
-        Raises:
-            ValueError: `hidden_dim` is not a multiple of the model parallel way.
-            ValueError: `dim` is not a multiple of the model parallel way.
-    """
-
-    @_LogActionOnce(m_logger=logger, key='FeedForward',
-                    no_warning=_get_parallel_mode() in (ParallelMode.STAND_ALONE,))
-    @_args_type_validator_check(dim=Validator.check_positive_int,
-                                compute_dtype=_valid_value_checks([mstype.float32, mstype.float16, mstype.bfloat16],
-                                                                  "FeedForward"),
-                                param_init_type=_valid_value_checks([mstype.float32, mstype.float16, mstype.bfloat16],
-                                                                    "FeedForward"))
-    def __init__(self, dim,
-                 intermediate_size=None,
-                 hidden_dim=None,
-                 hidden_dropout_prob=1.0,
-                 hidden_act=LlamaSiLU,
-                 ffn_dim_multiplier=None,
-                 compute_dtype=mstype.float16,
-                 param_init_type=mstype.float32,
-                 is_dynamic=False):
-        super().__init__()
-
-        if hidden_act is None or not (isinstance(hidden_act, str) or issubclass(hidden_act, nn.Cell)):
-            raise TypeError(f"For FeedForward cell, the hidden_act should str type or nn.Cell type, "
-                            f"but got {hidden_act}.")
-
-        if intermediate_size is not None:
-            hidden_dim = intermediate_size
-        else:
-            if ffn_dim_multiplier is not None:
-                hidden_dim = ffn_dim_multiplier
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.dtype = compute_dtype
-        self.hidden_act = hidden_act
-        self.dim = dim
-        self.hidden_dim = hidden_dim
-
-        self.mul = P.Mul()
-        self.cast = P.Cast()
-        self.w1 = TelechatLinear(in_channels=dim,
-                                 out_channels=hidden_dim,
-                                 activation=hidden_act,
-                                 has_bias=False,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type,
-                                 skip_redistribution=is_dynamic)
-
-        self.w2 = TelechatLinear(in_channels=hidden_dim,
-                                 out_channels=dim,
-                                 has_bias=True,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type,
-                                 skip_redistribution=is_dynamic,
-                                 keep_prob=1-self.hidden_dropout_prob)
-
-        self.w3 = TelechatLinear(in_channels=dim,
-                                 out_channels=hidden_dim,
-                                 has_bias=False,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type,
-                                 skip_redistribution=is_dynamic)
-
-    def construct(self, x):
-        """Forward process of the FeedForward"""
-        _check_input_dtype(F.dtype(x), "x", [mstype.float32, mstype.float16, mstype.bfloat16], self.cls_name)
-        x = self.cast(x, self.dtype)
-        # [bs, seq, hidden_dim] or [bs * seq, hidden_dim]
-        gate = self.w1(x) # dp,1 -> dp, mp
-        hidden = self.w3(x) # dp,1 -> dp, mp
-        hidden = self.mul(hidden, gate) # dp,mp -> dp, mp
-        output = self.w2(hidden) # dp,mp -> dp, 1
-        return output
-
-    def shard(self, parallel_config):
-        """sharding for feedforward"""
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        if self.hidden_dim % mp != 0:
-            raise ValueError("For 'FeedForward', the class variable 'hidden_dim' must be a multiple of the"
-                             "num of model parallel, but got the hidden_dim is {} and the num of model "
-                             "parallel is {}.".format(self.hidden_dim, mp))
-        if self.dim % mp != 0:
-            raise ValueError("For 'FeedForward', the class variable 'dim' must be a multiple of the num of "
-                             "model parallel, but got the dim is {} and the num of model parallel is {}."
-                             .format(self.dim, mp))
-        self.w1.shard(((dp, 1), (mp, 1)), strategy_activation=((dp, mp),))
-        self.w1.activation.shard(((dp, mp),))
-        self.w2.shard(((dp, mp), (1, mp)))
-        self.w3.shard(((dp, 1), (mp, 1)))
-        self.mul.shard(((dp, mp), (dp, mp)))
diff --git a/research/telechat/telechat_predict_utils.py b/research/telechat/telechat_predict_utils.py
deleted file mode 100644
index 1f0ac9ed..00000000
--- a/research/telechat/telechat_predict_utils.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat predict utils."""
-
-from collections import deque
-import copy
-
-class History:
-    """Init from a list of dict, use deque to meet some special situation."""
-    def __init__(self, tokenizer, history):
-        self.input_history = deque()
-        self.tokenizer = tokenizer
-        if history:
-            self._transfer_from_list(history)
-
-    def _transfer_from_list(self, history):
-        for message in history:
-            content = message.get("content")
-            # the token result may not be equal to the result model gen
-            message.update(self.tokenizer(content))
-            self.input_history.append(message)
-
-    def append(self, message):
-        content = message.get("content")
-        if "input_ids" not in message or "attention_mask" not in message:
-            message.update(self.tokenizer(content))
-        self.input_history.append(message)
-
-    def append_left(self, message):
-        content = message.get("content")
-        if "input_ids" not in message or "attention_mask" not in message:
-            message.update(self.tokenizer(content))
-        self.input_history.appendleft(message)
-
-    def pop(self):
-        x = self.input_history.pop()
-        return x
-
-    def pop_left(self):
-        x = self.pop_left()
-        return x
-
-    def update(self, message):
-        self.input_history.pop()
-        self.append(message)
-
-    def __len__(self):
-        return self.input_history.__len__()
-
-    def __str__(self):
-        return self.input_history.__str__()
-
-    def __copy__(self):
-        new_instance = type(self)(self.tokenizer, [])
-        new_instance.input_history = copy.copy(self.input_history)
-        return new_instance
-
-    def __deepcopy__(self, memodict=None):
-        new_instance = type(self)(self.tokenizer, [])
-        new_instance.input_history = copy.deepcopy(self.input_history)
-        return new_instance
diff --git a/research/telechat/telechat_preprocess.py b/research/telechat/telechat_preprocess.py
deleted file mode 100644
index 619c4425..00000000
--- a/research/telechat/telechat_preprocess.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""generate mindrecord script"""
-import os
-import argparse
-import collections
-from random import shuffle
-import datasets
-import numpy as np
-from tqdm import tqdm
-from mindspore.mindrecord import FileWriter
-from telechat_tokenizer import TelechatTokenizer
-
-class TelechatDataset:
-    """TelechatDataset"""
-    def __init__(self, output_path, seed, dataset_name):
-        self.output_path = output_path
-        self.seed = seed
-        self.raw_datasets = datasets.load_dataset(path="json", data_files=dataset_name)
-
-    def get_train_data(self):
-        dataset = self.raw_datasets["train"]
-        return dataset
-
-    def get_prompt(self, sample):
-        return "<_user>" + sample['input'] + "<_bot>"
-
-    def get_prompt_and_answer(self, sample):
-        return "<_user>" + sample['input'] + "<_bot>" + sample['output'] + "<_end>"
-
-
-def write_instance_to_file(writer, instance):
-    """write the instance to file"""
-    input_ids = instance["input_ids"]
-    labels = instance["labels"]
-
-    features = collections.OrderedDict()
-    features["input_ids"] = np.asarray(input_ids).astype(np.int32)
-    features["labels"] = np.asarray(labels).astype(np.int32)
-    writer.write_raw_data([features])
-    return features
-
-
-def make_input_mask(labels, tokenizer):
-    """generate input mask"""
-    user_token_id = tokenizer.convert_tokens_to_ids(args.user_token)
-    bot_token_id = tokenizer.convert_tokens_to_ids(args.bot_token)
-    end_token_id = tokenizer.convert_tokens_to_ids(args.end_token)
-    target_labels = np.zeros((1, args.max_length))
-    indices_user = np.where(np.array(labels) == user_token_id)[0]
-    indices_bot = np.where(np.array(labels) == bot_token_id)[0]
-    indices_end = np.where(np.array(labels) == end_token_id)[0]
-    assert len(indices_user) == len(indices_bot) == len(indices_end)
-    for i in range(len(indices_bot)):
-        user_idx = indices_user[i]
-        bot_idx = indices_bot[i]
-        end_idx = indices_end[i]
-        target_labels[0][bot_idx:end_idx + 1] = 1
-        target_labels[0][user_idx] = 1
-    return target_labels
-
-
-def process_dataset(current_dataset, tokenizer, max_seq_len):
-    """process dataset."""
-    dataset = []
-    all_lines = []
-    for _, tmp_data in enumerate(current_dataset):
-        input_data = tmp_data['input']
-        if not input_data.startswith("<_user>"):
-            input_data = "<_user>" + input_data
-        output = tmp_data['output']
-        if "<_bot>" in input_data: ### multiturn
-            concat_line = ""
-            input_turns = input_data.split("<_user>")[1:]
-            for item in input_turns:
-                if "<_bot>" in item:
-                    concat_line += "<_user>" + item + "<_end>"
-                else:
-                    concat_line += "<_user>" + item + "<_bot>"
-            concat_line += output + "<_end>"
-        else: ####single turn
-            concat_line = str(input_data) + "<_bot>" + str(output) + "<_end>"
-        assert concat_line.count("<_user>") == concat_line.count("<_bot>") == concat_line.count("<_end>")
-        all_lines.append(concat_line)
-    shuffle(all_lines)
-    previous_corpus_token_cnt = 0
-    shard = []
-    padding_out = []
-    for corpus in tqdm(all_lines):
-        corpus_ids = tokenizer(corpus)
-        if previous_corpus_token_cnt + len(corpus_ids["input_ids"]) < max_seq_len:
-            shard.append(corpus)
-            previous_corpus_token_cnt += len(corpus_ids["input_ids"])
-        else:
-            shard_output = "".join(shard)
-            shard_output = (args.max_length - previous_corpus_token_cnt) * tokenizer.pad_token + shard_output
-            assert len(tokenizer(shard_output)["input_ids"]) == max_seq_len
-            if shard_output.count("<_user>") >= 1:
-                padding_out.append(shard_output)
-            if len(corpus_ids["input_ids"]) < max_seq_len:
-                shard = [corpus]
-                previous_corpus_token_cnt = len(corpus_ids["input_ids"])
-            else:
-                shard = []
-                previous_corpus_token_cnt = 0
-    print("prompt length: ", len(padding_out))
-    for dt in padding_out:
-        tokens = tokenizer(dt)
-        tokens['labels'] = make_input_mask(tokens["input_ids"], tokenizer)
-        dataset.append(tokens)
-    return dataset
-
-
-def make_dataset():
-    """make dataset."""
-    raw_dataset = TelechatDataset(args.output_path, args.seed, args.input_dataset_file)
-    train_dataset = raw_dataset.get_train_data()
-    tokenizer = TelechatTokenizer(args.vocab_file_path, fast_tokenizer=True,
-                                  trust_remote_code=True, padding_side="left")
-    train_dataset = process_dataset(train_dataset, tokenizer, args.max_length)
-    print("***** Writing to output files *****")
-    print("Output File: %s", args.output_dataset_file)
-    writer = FileWriter(args.output_dataset_file, 1)
-    data_schema = {"input_ids": {"type": "int32", "shape": [-1]},
-                   "labels": {"type": "int32", "shape": [-1]}}
-    writer.add_schema(data_schema, "lm-schema")
-    for dataset in tqdm(train_dataset):
-        instance = {"input_ids": dataset["input_ids"], "labels": dataset["labels"]}
-        write_instance_to_file(writer, instance=instance)
-    writer.commit()
-    print(">>>> Transform dataset finished <<<<")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_dataset_file", type=str, default="")
-    parser.add_argument("--output_path", type=str, default="")
-    parser.add_argument('--vocab_file_path', default='', type=str, help='which model to use.')
-    parser.add_argument("--max_length", type=int, default=2048)
-    parser.add_argument("--seed", type=int, default=1233)
-    parser.add_argument("--user_token", type=str, default="<_user>", help="user token")
-    parser.add_argument("--bot_token", type=str, default="<_bot>", help="bot token")
-    parser.add_argument("--end_token", type=str, default="<_end>", help="end token")
-    args = parser.parse_args()
-
-    if args.output_path and not os.path.exists(args.output_path):
-        os.makedirs(args.output_path, exist_ok=True)
-    args.output_dataset_file = os.path.join(args.output_path, "new_dataset.mindrecord")
-    make_dataset()
diff --git a/research/telechat/telechat_tokenizer.py b/research/telechat/telechat_tokenizer.py
deleted file mode 100644
index 73542366..00000000
--- a/research/telechat/telechat_tokenizer.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright 2024 Telechat Inc. All Rights Reserved.
-
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# ============================================================================
-"""Telechat tokenizer APIs."""
-
-import os
-from shutil import copyfile
-from typing import Any, Dict, List, Optional
-
-import sentencepiece as spm
-
-from mindformers.tools import logger
-from mindformers.models.tokenization_utils import PreTrainedTokenizer, AddedToken
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-
-VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
-
-
-@MindFormerRegister.register(MindFormerModuleType.TOKENIZER)
-class TelechatTokenizer(PreTrainedTokenizer):
-    r"""
-    Tokenize the input string and convert them into the ids. The tokenizer use the sentence piece internally.
-
-    Args:
-        model_path(str): The spiece.model file path.
-        add_bos(bool): The flag defines whether add bos token, Default True.
-        eos_token(str): The token that represents the end-of-sentence. Default "</s>".
-        unk_token(str): The token that represents the unknown. Default "<unk>".
-        pad_token(str): The token that represents the pad. Default "<pad>".
-        sp_model_kwargs(str): Other kwargs for sp_model`.
-        add_bos_token(bool): Whether or not to add the bos_token_id to the left of the input. Default "True"
-        add_eos_token(bool): Whether or not to add the eos_token_id to the right of the input. Default "True"
-        clean_up_tokenization_spaces (bool): Whether or not the model should cleanup the spaces that were added when
-        splitting the input text during the tokenization process.  Default "False"
-        **kwargs: Other kwargs that will be passed into the base class of the `Tokenizer`.
-
-    Outputs:
-        A dict contains the processed ids, attention_mask that specific by the member `MODEL_INPUT_NAME`
-        of the subclass.
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    model_input_names = ["input_ids", "attention_mask"]
-    FILE_LIST = ['tokenizer_config.json']
-
-    def __init__(
-            self,
-            vocab_file,
-            unk_token="<unk>",
-            bos_token="<_start>",
-            eos_token="<_end>",
-            pad_token="<_pad>",
-            sp_model_kwargs: Optional[Dict[str, Any]] = None,
-            add_bos_token=False,
-            add_eos_token=False,
-            clean_up_tokenization_spaces=False,
-            **kwargs,
-    ):
-        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
-        bos_token = AddedToken(bos_token, lstrip=False, rstrip=False, single_word=False, normalized=True) \
-            if isinstance(bos_token, str) else bos_token
-        eos_token = AddedToken(eos_token, lstrip=False, rstrip=False, single_word=True, normalized=True) \
-            if isinstance(eos_token, str) else eos_token
-        unk_token = AddedToken(unk_token, lstrip=False, rstrip=False, single_word=True, normalized=True) \
-            if isinstance(unk_token, str) else unk_token
-        pad_token = AddedToken(pad_token, lstrip=False, rstrip=False, single_word=True, normalized=True) \
-            if isinstance(pad_token, str) else pad_token
-
-        self.vocab_file = vocab_file
-        self.add_bos_token = add_bos_token
-        self.add_eos_token = add_eos_token
-        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
-        self.sp_model.Load(vocab_file)
-
-        super().__init__(
-            bos_token=bos_token,
-            eos_token=eos_token,
-            unk_token=unk_token,
-            pad_token=pad_token,
-            add_bos_token=add_bos_token,
-            add_eos_token=add_eos_token,
-            sp_model_kwargs=self.sp_model_kwargs,
-            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
-            **kwargs,
-        )
-
-    def __getstate__(self):
-        state = self.__dict__.copy()
-        state["sp_model"] = None
-        return state
-
-    def __setstate__(self, d):
-        self.__dict__ = d
-        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
-        self.sp_model.Load(self.vocab_file)
-
-    @property
-    def vocab_size(self):
-        """Returns vocab size"""
-        return self.sp_model.get_piece_size()
-
-    def get_vocab(self):
-        """Returns vocab as a dict"""
-        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
-        vocab.update(self.added_tokens_encoder)
-        return vocab
-
-    def _tokenize(self, text):
-        """Returns a tokenized string."""
-        return self.sp_model.encode(text, out_type=str)
-
-    def _convert_token_to_id(self, token):
-        """Converts a token (str) in an id using the vocab."""
-        return self.sp_model.piece_to_id(token)
-
-    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (str) using the vocab."""
-        token = self.sp_model.IdToPiece(index)
-        return token
-
-    def convert_tokens_to_string(self, tokens):
-        """Converts a sequence of tokens (string) in a single string."""
-        current_sub_tokens = []
-        out_string = ""
-        prev_is_special = False
-        for i, token in enumerate(tokens):
-            # make sure that special tokens are not decoded using sentencepiece model
-            if token in self.all_special_tokens:
-                if not prev_is_special and i != 0:
-                    out_string += " "
-                out_string += self.sp_model.decode(current_sub_tokens) + token
-                prev_is_special = True
-                current_sub_tokens = []
-            else:
-                current_sub_tokens.append(token)
-                prev_is_special = False
-        out_string += self.sp_model.decode(current_sub_tokens)
-        return out_string
-
-    # pylint: disable=R1710
-    def save_vocabulary(self, save_directory, filename_prefix=None):
-        """
-        Save the vocabulary and special tokens file to a directory.
-
-        Args:
-            save_directory (`str`):
-                The directory in which to save the vocabulary.
-
-        Returns:
-            `Tuple(str)`: Paths to the files saved.
-        """
-        if not os.path.isdir(save_directory):
-            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
-            return
-        out_vocab_file = os.path.join(
-            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
-        )
-
-        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
-            copyfile(self.vocab_file, out_vocab_file)
-        elif not os.path.isfile(self.vocab_file):
-            with open(out_vocab_file, "wb") as fi:
-                content_spiece_model = self.sp_model.serialized_model_proto()
-                fi.write(content_spiece_model)
-
-        return out_vocab_file
-
-    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
-        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
-        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
-
-        output = bos_token_id + token_ids_0 + eos_token_id
-
-        if token_ids_1 is not None:
-            output = output + bos_token_id + token_ids_1 + eos_token_id
-
-        return output
-
-    def get_special_tokens_mask(self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None,
-                                already_has_special_tokens: bool = False):
-        """
-        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
-        special tokens using the tokenizer `prepare_for_model` method.
-
-        Args:
-            token_ids_0 (`List[int]`):
-                List of IDs.
-            token_ids_1 (`List[int]`, *optional*):
-                Optional second list of IDs for sequence pairs.
-            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
-                Whether or not the token list is already formatted with special tokens for the model.
-
-        Returns:
-            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
-        """
-        if already_has_special_tokens:
-            return super().get_special_tokens_mask(
-                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
-            )
-
-        bos_token_id = [1] if self.add_bos_token else []
-        eos_token_id = [1] if self.add_eos_token else []
-
-        if token_ids_1 is None:
-            return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
-        return (
-            bos_token_id
-            + ([0] * len(token_ids_0))
-            + eos_token_id
-            + bos_token_id
-            + ([0] * len(token_ids_1))
-            + eos_token_id
-        )
-
-    def create_token_type_ids_from_sequences(self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None):
-        """
-        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
-        sequence pair mask has the following format:
-
-        ```
-        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
-        | first sequence    | second sequence |
-        ```
-
-        if token_ids_1 is None, only returns the first portion of the mask (0s).
-
-        Args:
-            token_ids_0 (`List[int]`):
-                List of ids.
-            token_ids_1 (`List[int]`, *optional*):
-                Optional second list of IDs for sequence pairs.
-
-        Returns:
-            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
-        """
-        bos_token_id = [self.bos_token_id] if self.add_bos_token else []
-        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
-
-        output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
-
-        if token_ids_1 is not None:
-            output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
-
-        return output
diff --git a/research/telechat/telechat_transformer.py b/research/telechat/telechat_transformer.py
deleted file mode 100644
index 03b6fb04..00000000
--- a/research/telechat/telechat_transformer.py
+++ /dev/null
@@ -1,497 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Telechat transformer Layer's APIs."""
-from typing import Tuple, Optional
-import math
-
-from mindspore import nn, __version__
-import mindspore.common.dtype as mstype
-from mindspore.common.tensor import Tensor
-from mindspore.context import ParallelMode
-from mindspore.ops import operations as P
-from mindspore.parallel._utils import _get_parallel_mode, _is_sharding_propagation
-
-from mindformers.tools.utils import is_version_ge
-from mindformers.tools.logger import logger
-from mindformers.modules.flash_attention import FlashAttention
-from mindformers.modules.layers import _check_input_dtype, Dropout, RotaryEmbedding
-from mindformers.modules.transformer import TransformerOpParallelConfig
-from mindformers.modules.infer_attention import InferAttention
-from mindformers.models.llama.llama_layer import LlamaRMSNorm
-from telechat_layer import TelechatLinear, TelechatFeedForward
-
-class TelechatAttention(nn.Cell):
-    r"""
-    This is an implementation of multihead attention in Telechat.
-
-    Args:
-            - **src_seq_length** (int): The sequence length of the query vector.
-            - **tgt_seq_length** (int): The sequence length of the key and value vector.
-            - **dim** (int): The hidden size of the input.
-            - **head_dim** (int): The dim of head.
-            - **n_heads** (int): The number of the heads.
-            - **compute_dtype** (dtype.Number): The computation type of dense. Default mstype.float16.
-                Should be mstype.float32 or mstype.float16.
-            - **softmax_compute_type** (dtype.Number): The type of softmax computation module. Default mstype.float32.
-                Should be mstype.float32 or mstype.float16.
-            - **param_init_type** (dtype.Number): The parameter initialization type of the module. Default mstype.
-                float32. Should be mstype.float32 or mstype.float16.
-            - **qkv_has_bias** (bool): Whether Q/K/V in attention has bias or not.
-            - **use_past** (bool): Use the past state to compute, used for incremental prediction.
-                For example, if we have two words and want to generate the ten more words.
-                We just need to compute the two words' state only once, and generate the next word one by one.
-                When use_past is True, there are two steps to run the prediction.
-                In the first step, set the is_first_iteration to be True by
-                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
-                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`. At this moment,
-                pass the single step's input tensor, and loop it. Default False.
-            - **parallel_config** (OpParallelConfig): The parallel configure. Default `default_dpmp_config`,
-                an instance of `OpParallelConfig` with default args.
-
-    Inputs:
-            - **x** (Tensor) - The input tokens with shape (batch_size, src_seq_length, hidden_size) or
-                (batch_size * src_seq_length, hidden_size), if the use_past is False or is_first_iteration=True.
-                Otherwise, must be (batch_size, 1, hidden_size)
-            - **freqs_cis** (Tuple) - The precompute freqs and mask for rotary position embedding used in attention.
-            - **attention_mask** (Tensor) - If the use_past is False or is_first_iteration=True, the attention mask
-                matrix should ba (batch_size, src_seq_length, tgt_seq_length), or None. None means there will be no mask
-                in softmax computation. Otherwise, the mask must be (batch_size, 1, tgt_seq_length)
-            - **key_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, head_dim, tgt_seq_length).
-                The past calculated key vector. Used for incremental prediction when the use_past is True.
-                Default None.
-            - **value_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, tgt_seq_length,
-                head_dim).
-                The past calculated value vector. Used for incremental prediction when the use_past is True.
-                Default None.
-            - **batch_valid_length** (Tensor) - Int32 tensor with shape (batch_size,) the past calculated the index.
-                Used for incremental prediction when the use_past is True. Default None.
-            - **block_tables** (Tensor[int64]) - Store mapping tables for each sequence.
-            - **slot_mapping** (Tensor[int32]) - Store token cache physical slot index.
-    Outputs:
-            Tuple, a tuple contains(`output`, `layer_present`)
-
-            - **output** (Tensor) - Tensor, the float tensor of the output of the layer with
-                shape (batch_size, src_seq_length, hidden_size) or (batch_size * src_seq_length, hidden_size),
-                if the use_past is False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size).
-
-            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
-                ((batch_size, num_heads, head_dim, tgt_seq_length),
-                (batch_size, num_heads, tgt_seq_length, head_dim)).
-    """
-    def __init__(self,
-                 seq_length,
-                 dim: int = 512,
-                 n_heads: int = 8,
-                 hidden_dropout_prob: float = 1.0,
-                 attention_dropout_prob: float = 1.0,
-                 n_kv_heads: Optional[int] = None,
-                 compute_dtype=mstype.float16,
-                 softmax_compute_dtype=mstype.float32,
-                 rotary_dtype=mstype.float32,
-                 param_init_type=mstype.float32,
-                 qkv_has_bias=False,
-                 use_past=False,
-                 is_dynamic=False,
-                 use_rope_slice=False,
-                 use_flash_attention=False,
-                 block_size: Optional[int] = None,
-                 num_blocks: Optional[int] = None,
-                 parallel_config=TransformerOpParallelConfig()):
-        super().__init__()
-        self.seq_length = seq_length
-        self.hidden_size = dim
-        self.n_head = n_heads
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_dropout_prob = attention_dropout_prob
-        self.head_dim = dim // n_heads
-        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads
-        self.n_rep = self.n_head // self.n_kv_head
-        self.kv_dim = self.n_kv_head * self.head_dim
-        self.block_size = block_size
-        self.num_blocks = num_blocks
-
-        self.dtype = compute_dtype
-        self.softmax_dtype = softmax_compute_dtype
-        self.is_first_iteration = True
-        self.use_past = use_past
-        self.use_flash_attention = use_flash_attention
-
-        if self.hidden_size % self.n_head != 0:
-            raise ValueError("For 'MultiHeadAttention', the class variable 'hidden_size' must be a multiple "
-                             "of 'n_head', but got the hidden_size is {} and the n_head is {}."
-                             .format(self.hidden_size, self.n_head))
-        if self.n_kv_head % parallel_config.model_parallel != 0:
-            raise ValueError("For 'MultiHeadAttention', the class variable 'n_kv_head' must be a multiple of "
-                             "'parallel_config.model_parallel', but got the n_kv_head is {} "
-                             "and the parallel_config.model_parallel  is {}."
-                             .format(self.n_kv_head, parallel_config.model_parallel))
-
-        self.inv_norm_factor = Tensor(1.0 / math.sqrt(self.head_dim), dtype=compute_dtype)
-
-        self.shape = P.Shape()
-        self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
-        self.transpose = P.Transpose()
-        self.merger_head_transpose = P.Transpose()
-        self.batch_matmul = P.BatchMatMul()
-        self.batch_matmul_q_k = P.BatchMatMul(transpose_b=True)
-        self.mul = P.Mul()
-        self.add = P.Add()
-        self.softmax = P.Softmax()
-        self.cast = P.Cast()
-        self.cast_attn = P.Cast()
-        self.tile_kv = P.Tile()
-        self.split = P.Split(output_num=2, axis=-1)
-        self.apply_rotary_emb = RotaryEmbedding(self.head_dim, rotary_dtype, use_rope_slice=use_rope_slice)
-        self.attention_dropout = Dropout(1-self.attention_dropout_prob)
-        self.wo = TelechatLinear(in_channels=self.hidden_size,
-                                 out_channels=self.hidden_size,
-                                 has_bias=True,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type,
-                                 skip_redistribution=is_dynamic,
-                                 keep_prob=1-self.hidden_dropout_prob)
-        self.wq = TelechatLinear(self.hidden_size,
-                                 self.hidden_size,
-                                 has_bias=qkv_has_bias,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type,
-                                 skip_redistribution=is_dynamic)
-        self.wk_v = TelechatLinear(self.hidden_size,
-                                   self.n_kv_head * self.head_dim * 2,
-                                   has_bias=qkv_has_bias,
-                                   compute_dtype=compute_dtype,
-                                   param_init_type=param_init_type,
-                                   skip_redistribution=is_dynamic)
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        if not (_get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation()):
-            self.transpose.shard(((dp, 1, mp, 1),))
-            self.merger_head_transpose.shard(((dp, mp, 1, 1),))
-            self.batch_matmul_q_k.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-            self.batch_matmul.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-            self.mul.shard(((dp, mp, 1, 1), ()))
-            self.add.shard(((dp, 1, 1, 1), (dp, mp, 1, 1)))
-            self.softmax.shard(((dp, mp, 1, 1),))
-            self.tile_kv.shard(((dp, mp, 1, 1),))
-
-            self.apply_rotary_emb.shard(parallel_config)
-
-            if qkv_has_bias:
-                self.wq.shard(((dp, 1), (mp, 1)), ((dp, mp), (mp,)))
-                self.wk_v.shard(((dp, 1), (mp, 1)), ((dp, mp), (mp,)))
-            else:
-                self.wq.shard(((dp, 1), (mp, 1)))
-                self.wk_v.shard(((dp, 1), (mp, 1)))
-            self.wo.shard(((dp, mp), (1, mp)))
-            if parallel_config.use_seq_parallel and self.is_first_iteration:
-                self.wo.shard(((dp, mp), (1, mp)), out_strategy_matmul=((dp * mp, 1),))
-            if parallel_config.recompute.select_recompute:
-                self.apply_rotary_emb.recompute()
-                self.tile_kv.recompute()
-                self.batch_matmul_q_k.recompute()
-                self.mul.recompute()
-                self.add.recompute()
-                self.cast_attn.recompute()
-                self.softmax.recompute()
-                self.batch_matmul.recompute()
-
-        if not is_version_ge(__version__, "2.2.0"):
-            self.use_flash_attention = False
-            logger.info("Current MindSpore do not support flash attention, please upgrade to 2.2.0 or higher")
-        if self.use_flash_attention:
-            self.flash_attention = FlashAttention(head_num=n_heads,
-                                                  scale_value=1. / math.sqrt(self.head_dim),
-                                                  input_layout='BNSD',
-                                                  pre_tokens=65536,
-                                                  next_tokens=0)
-            self.flash_attention.shard(parallel_config)
-
-        if self.use_past:
-            self.infer_attention = InferAttention(self.n_head,
-                                                  self.head_dim,
-                                                  self.n_kv_head,
-                                                  pa_n_head_split=self.n_head // mp,
-                                                  pa_n_kv_head_split=self.n_kv_head // mp,
-                                                  scale_value=1. / math.sqrt(self.head_dim),
-                                                  pre_tokens=65536,
-                                                  next_tokens=0,
-                                                  block_size=self.block_size,
-                                                  num_blocks=self.num_blocks,
-                                                  use_flash_attention=self.use_flash_attention,
-                                                  rotary_cos_format=2,
-                                                  rotary_dtype=rotary_dtype,
-                                                  compute_dtype=compute_dtype)
-
-    def construct(self, x: Tensor, freqs_cis: Tuple[Tensor, Tensor], mask=None, batch_valid_length=None,
-                  block_tables=None, slot_mapping=None):
-        """Forward process of the MultiHeadAttention"""
-        ori_dtype = x.dtype
-        # [bs, seq/1, hidden_dim]
-        bs, seq_len, _ = self.shape(x)
-        query = self.cast(self.wq(x), self.dtype)  # dp, 1 -> dp, mp
-        key_value = self.cast(self.wk_v(x), self.dtype)
-        key, value = self.split(key_value)
-
-        # key and value for current token(s)
-        if self.use_past:
-            context_layer = self.infer_attention(query, key, value, batch_valid_length, block_tables, slot_mapping,
-                                                 freqs_cis, mask)
-        else:
-            query = self.transpose(self.reshape(query, (bs, seq_len, self.n_head, self.head_dim)), (0, 2, 1, 3))
-            key = self.transpose(self.reshape(key, (bs, seq_len, self.n_kv_head, self.head_dim)), (0, 2, 1, 3))
-            value = self.transpose(self.reshape(value, (bs, seq_len, self.n_kv_head, self.head_dim)), (0, 2, 1, 3))
-            query, key = self.apply_rotary_emb(query, key, freqs_cis)  # dp, mp, 1, 1
-            if self.use_flash_attention:
-                context_layer = self.flash_attention(query, key, value, mask)
-                context_layer = self._merge_heads(context_layer)
-            else:
-                key = self._repeat_kv(key, self.n_rep)
-                value = self._repeat_kv(value, self.n_rep)
-                context_layer = self._attn(query, key, value, mask)
-
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        output = self.wo(context_layer)  # dp, mp -> dp, 1 / dp * mp, 1
-        output = self.cast(output, ori_dtype)
-        return output
-
-    def _repeat_kv(self, x, rep):
-        if rep == 1:
-            return x
-        bs, n_kv_head, seqlen, head_dim = self.shape(x)
-        x = self.reshape(x, (bs, n_kv_head, 1, seqlen * head_dim))
-        x = self.tile_kv(x, (1, 1, rep, 1))
-        x = self.reshape(x, (bs, n_kv_head * rep, seqlen, head_dim))
-        return x
-
-    def _merge_heads(self, x):
-        """
-        convert a 4d input to a 2d or 3d output
-
-        Inputs:
-            x: input tensor
-
-        Output:
-            x_merge: the 2d output
-        """
-        # [bs, n_head, seq/1, head_dim]
-        x = self.merger_head_transpose(x, (0, 2, 1, 3)) # dp,mp,1,1 -> dp,1,mp,1
-        # [bs, seq/1, n_head, head_dim]
-        bs, seq_len, n_head, head_dim = self.shape(x)
-        # [bs, seq/1, hidden_dim]
-        new_shape = (bs, seq_len, n_head * head_dim)
-        x_merge = self.reshape(x, new_shape)
-        return x_merge
-
-    def _attn(self, query, key, value, mask):
-        """
-        Get the weighted score along the seq_length
-
-        Inputs:
-            query: the query matrix
-            key: the key matrix
-            value: the value matrix
-            mask: the attention mask adder matrix with shape (batch_size,
-            1, seq_length, seq_length)
-        Outputs:
-            weighted_values: Tensor, the weighted sum scores
-        """
-        # q, k: [bs, n_head, seq/1, head_dim], [bs, n_head, seq, head_dim]
-        score = self.batch_matmul_q_k(query, key)
-        # score: [bs, n_head, seq/1, seq]
-        score = self.mul(score, self.inv_norm_factor)
-        score = self.add(mask, score)
-
-        attention_probs = self.softmax(self.cast_attn(score, self.softmax_dtype))
-        # score, v: [bs, n_head, seq/1, seq], [bs, n_head, seq, head_dim]
-        attention_probs = self.attention_dropout(attention_probs)
-        weighted_values = self.batch_matmul(self.cast(attention_probs, self.dtype), value)
-        # [bs, n_head, seq/1, head_dim]
-        attention_merge = self._merge_heads(weighted_values)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        return attention_merge
-
-
-class TelechatDecodeLayer(nn.Cell):
-    r"""
-        Transformer Layer. This is an implementation of the single layer of the transformer
-        encoder layer, including multihead attention and feedward layer.
-
-        Args:
-            seq_length(int): The input sequence length.
-            layer_id(int): The layer id of current transformer block layer.
-            dim(int): The hidden size of the input.
-            num_heads(int): The number of the heads.
-            norm_eps (float): The epsilon value of the denominator. Default 1e-5.
-            compute_dtype(dtype.Number): The computation type of the layer.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            layernorm_compute_type(dtype.Number): The computation type of the norm.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            softmax_compute_type(dtype.Number): The computation type of the softmax in the attention.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            param_init_type(dtype.Number): The parameter initialization type of the module.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            qkv_has_bias(bool): Whether Q/K/V in attention has bias or not.
-            use_past(bool): Use the past state to compute, used for incremental prediction. For example, if we have two
-                words and want to generate the ten more words. We just need to compute the two words' state only once,
-                and generate the next word one by one. When use_past is True, there are two steps to run the prediction.
-                In the first step, set the is_first_iteration to be True by
-                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
-                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`.
-                At this moment, pass the single step's input tensor, and loop it. Default False.
-            parallel_config(OpParallelConfig, MoEParallelConfig): The parallel configure. When MoE is applied,
-                MoEParallelConfig is effective, otherwise OpParallelConfig is effective. Default `default_dpmp_config`,
-                an instance of `OpParallelConfig` with default args.
-
-        Inputs:
-            - **x** (Tensor) - Float Tensor, shape should be [batch_size, seq_length, hidden_size] or
-              [batch_size * seq_length, hidden_size], if the use_past is False or is_first_iteration=True. Otherwise,
-              should be [batch_size, 1, hidden_size]
-            - **freqs_cis** (Tuple) - The precompute freqs and mask for rotary position embedding used in attention.
-            - **input_mask** (Tensor) - Float Tensor, If the use_past is False or is_first_iteration=True,
-              the attention mask matrix should ba [batch_size, seq_length, seq_length], or None. None means there will
-              be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
-            - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
-              past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
-            - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
-              Used for incremental prediction when the use_past is True. Default None.
-            - **block_tables** (Tensor[int64]) - Store mapping tables for each sequence.
-            - **slot_mapping** (Tensor[int32]) - Store token cache physical slot index.
-        Outputs:
-            Tuple, a tuple contains(`output`, `layer_present`).
-
-            - **output** (Tensor) - The float tensor of the output of the layer with
-              shape (batch_size, seq_length, hidden_size) or (batch_size * seq_length, hidden_size), if the use_past is
-              False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size)
-
-            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
-              ((batch_size, num_heads, head_dim, seq_length),
-              (batch_size, num_heads, seq_length, head_dim)).
-
-    """
-    def __init__(self,
-                 seq_length,
-                 layer_id,
-                 dim: int = 512,
-                 n_heads: int = 8,
-                 hidden_dropout_prob: float = 1.0,
-                 attention_dropout_prob: float = 1.0,
-                 n_kv_heads: Optional[int] = None,
-                 intermediate_size: Optional[int] = None,
-                 ffn_dim_multiplier: Optional[int] = None,
-                 norm_eps: float = 1e-5,
-                 compute_dtype=mstype.float16,
-                 layernorm_compute_dtype=mstype.float32,
-                 softmax_compute_dtype=mstype.float32,
-                 rotary_dtype=mstype.float32,
-                 param_init_type=mstype.float32,
-                 qkv_has_bias=False,
-                 use_past=False,
-                 is_dynamic=False,
-                 use_rope_slice=False,
-                 use_flash_attention=False,
-                 block_size: Optional[int] = None,
-                 num_blocks: Optional[int] = None,
-                 parallel_config=TransformerOpParallelConfig()):
-        super().__init__()
-        self.seq_length = seq_length
-        self.layer_id = layer_id
-        self.hidden_size = dim
-        self.n_head = n_heads
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_dropout_prob = attention_dropout_prob
-        self.head_dim = self.hidden_size // self.n_head
-        self.n_kv_head = n_heads if n_kv_heads is None else n_kv_heads
-        self.dtype = compute_dtype
-        self.is_first_iteration = True
-        self.use_past = use_past
-
-        self.shape = P.Shape()
-        self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
-        self.add = P.Add()
-        self.attention_norm = LlamaRMSNorm(self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
-        self.ffn_norm = LlamaRMSNorm(self.hidden_size, norm_eps, compute_type=layernorm_compute_dtype)
-        self.attention = TelechatAttention(seq_length=seq_length,
-                                           dim=dim,
-                                           n_heads=n_heads,
-                                           hidden_dropout_prob=hidden_dropout_prob,
-                                           attention_dropout_prob=attention_dropout_prob,
-                                           n_kv_heads=n_kv_heads,
-                                           compute_dtype=compute_dtype,
-                                           softmax_compute_dtype=softmax_compute_dtype,
-                                           rotary_dtype=rotary_dtype,
-                                           param_init_type=param_init_type,
-                                           qkv_has_bias=qkv_has_bias,
-                                           use_past=use_past,
-                                           is_dynamic=is_dynamic,
-                                           use_rope_slice=use_rope_slice,
-                                           use_flash_attention=use_flash_attention,
-                                           block_size=block_size,
-                                           num_blocks=num_blocks,
-                                           parallel_config=parallel_config)
-        self.feed_forward = TelechatFeedForward(dim=self.hidden_size,
-                                                intermediate_size=intermediate_size,
-                                                hidden_dim=4 * self.hidden_size,
-                                                hidden_dropout_prob=hidden_dropout_prob,
-                                                ffn_dim_multiplier=ffn_dim_multiplier,
-                                                compute_dtype=compute_dtype,
-                                                param_init_type=param_init_type,
-                                                is_dynamic=is_dynamic)
-
-        dp = parallel_config.data_parallel
-        mp = parallel_config.model_parallel
-        if not (_get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation()):
-            self.feed_forward.shard(parallel_config)
-            self.add.shard(((dp, 1, 1), (dp, 1, 1)))
-            self.attention_norm.shard((dp, 1, 1))
-            self.ffn_norm.shard((dp, 1, 1))
-            self.feed_forward.mul.shard(((dp, 1, mp), (dp, 1, mp)))
-
-        if parallel_config.use_seq_parallel and self.is_first_iteration:
-            self.add.shard(((dp, mp, 1), (dp, mp, 1)))
-            self.attention_norm.shard((dp, mp, 1))
-            self.ffn_norm.shard((dp, mp, 1))
-            self.feed_forward.w2.shard(((dp, mp), (1, mp)), out_strategy_matmul=((dp * mp, 1),))
-
-    def construct(self, x, freqs_cis, mask=None, batch_valid_length=None, block_tables=None, slot_mapping=None):
-        """ Forward of transformer block. """
-        self._check_input(x, freqs_cis, mask)
-        # [bs, seq/1, hidden_dim]
-        input_x = self.attention_norm(x)
-        # [bs, seq/1, hidden_dim]
-        h = self.attention(input_x, freqs_cis, mask, batch_valid_length, block_tables, slot_mapping)
-        h = self.add(x, h)
-        ffn_norm = self.ffn_norm(h)
-        # [bs, seq/1, hidden_dim]
-        ffn_out = self.feed_forward(ffn_norm)
-        # [bs, seq/1, hidden_dim] or [bs * seq/1, hidden_dim]
-        out = self.add(h, ffn_out)
-        return out
-
-    def _check_input(self, x, freqs_cis, mask):
-        r"""Check inputs"""
-        _check_input_dtype(
-            x.dtype, "x", [mstype.float32, mstype.float16, mstype.bfloat16], self.cls_name)
-        freqs_cos, freqs_sin, swap_mask = freqs_cis
-        _check_input_dtype(freqs_cos.dtype, "freqs_cos",
-                           [mstype.float32, mstype.float16, mstype.bfloat16], self.cls_name)
-        _check_input_dtype(freqs_sin.dtype, "freqs_sin",
-                           [mstype.float32, mstype.float16, mstype.bfloat16], self.cls_name)
-        if swap_mask is not None:
-            _check_input_dtype(swap_mask.dtype, "swap_mask",
-                               [mstype.float32, mstype.float16, mstype.bfloat16], self.cls_name)
-        if mask is not None:
-            _check_input_dtype(mask.dtype, "input_mask",
-                               [mstype.float32, mstype.float16, mstype.bfloat16, mstype.uint8], self.cls_name)
-        return True
diff --git a/research/visualglm/attention.py b/research/visualglm/attention.py
deleted file mode 100644
index 3daf65f9..00000000
--- a/research/visualglm/attention.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""glm attention adaptor for visualglm."""
-
-
-from mindspore import ops
-from mindspore.ops import functional as F
-
-from mindformers.models.glm.attention import RotaryEmbeddingFP32SoftmaxSelfAttention
-
-
-def split_tensor_along_last_dim(tensor, num_partitions):
-    """
-    Split a tensor along its last dimension.
-    Used in construct function.
-
-    Arguments:
-        tensor (Tensor): Input tensor.
-        num_partitions (int): Number of partitions to split the tensor.
-    """
-    # Get the size and dimension.
-    last_dim = tensor.ndim - 1
-    # Split.
-    tensor_list = ops.Split(axis=last_dim, output_num=num_partitions)(tensor)
-    return tensor_list
-
-
-def transpose_for_scores(raw_tensor, last_size):
-    """Transpose a 3D tensor [b, s, np*hn] into a 4D tensor with
-    size [b, np, s, hn].
-    """
-    new_tensor_shape = raw_tensor.shape[:-1] + (-1, last_size)
-    raw_tensor = raw_tensor.view(*new_tensor_shape)
-    return raw_tensor
-
-
-class SelfAttentionAdapter(RotaryEmbeddingFP32SoftmaxSelfAttention):
-    """
-    RotaryEmbeddingFP32SoftmaxSelfAttention adaptor for visualglm.
-    """
-
-    def attention_forward(self, hidden_states, mask, position_ids, layer_id, key_past=None, value_past=None,
-                          batch_valid_length=None):
-        """
-        attention forward
-
-        Input:
-            hidden_states (Tensor): Hidden layer states.
-            mask (Tensor): Same as `attention_mask`, used when batching sequences together.
-            position_ids (Tensor): Used to identify each token's position in the list of tokens.
-            layer_id (int): Layer id.
-            key_past (Tensor, optional): Default: None.
-            value_past (Tensor, optional): Default: None.
-            batch_valid_length (bool, optional): Default: None.
-
-        return:
-            output (Tensor): Attention output.
-            layer_present (Tensor): Layer present, used for infer cache.
-        """
-        mixed_raw_layer = self.query_key_value(hidden_states)
-        mixed_raw_layer = F.cast(mixed_raw_layer, self.compute_dtype)
-
-        (mixed_query_layer, mixed_key_layer, mixed_value_layer) = \
-            split_tensor_along_last_dim(mixed_raw_layer, 3)
-        # [1, 64, 32, 128]
-        query_layer = transpose_for_scores(mixed_query_layer, self.hidden_size_per_attention_head)
-        key_layer = transpose_for_scores(mixed_key_layer, self.hidden_size_per_attention_head)
-        value_layer = transpose_for_scores(mixed_value_layer, self.hidden_size_per_attention_head)
-
-        if self.position_encoding_2d:
-            q1, q2 = self.split(query_layer)
-            k1, k2 = self.split(key_layer)
-            position_ids, block_position_ids = position_ids[:, 0, :], \
-                                               position_ids[:, 1, :]
-            q1, k1 = self.rotary_emb(q1, k1, position_ids)
-            q2, k2 = self.rotary_emb(q2, k2, block_position_ids)
-            query_layer = self.concat_query((q1, q2))
-            key_layer = self.concat_query((k1, k2))
-        else:
-            # apply rotary embed on q, k: [bs, seq,  num_heads, hidden_size]
-            # position_ids: bs, 2, seq_length
-            query_layer, key_layer = self.rotary_emb(query_layer, key_layer, position_ids)
-
-        # key and value for current token(s)
-        # [bs, num_heads, hidden_size, seq_len]
-        value_layer = F.transpose(value_layer, (0, 2, 1, 3))
-        key_present = key_layer
-        value_present = value_layer
-        if self.use_past:
-            # reshape
-            key_present = F.transpose(key_present, (0, 2, 3, 1))
-            value_present = F.transpose(value_present, (0, 1, 3, 2))
-            # The first graph with the input size of (bs, seq_length)
-            if self.is_first_iteration:
-                # Get the valid input length without padding
-                valid_length_vector = F.cast(self.less(self.range, batch_valid_length.view(-1, 1, 1)),
-                                             self.params_dtype)  # [bs, 1, seq_len]
-                # Cover the key and value numbers corresponding to the padding position
-                key_present = self.mul1(key_present, self.expand_dims(valid_length_vector, 2))
-                value_present = self.mul1(value_present, self.expand_dims(valid_length_vector, 2))
-            # The second graph with the inpus size of (bs, 1)
-            # the shape of query is (bs, num_heads, 1, size_per_head)
-            # the shape of key is   (bs, num_heads, size_per_head, 1)
-            # the shape of value is (bs, num_heads, 1, size_per_head)
-            else:
-                # Get the current token position index
-                # key_past: [batch_size, num_heads, size_per_head, seq_length]
-                valid_length = batch_valid_length - 1
-                valid_length = F.reshape(valid_length, (-1, 1, 1))  # [bs, 1, 1]
-                # self.range: [bs, 1, config.seq_len]
-                valid_length_vector = F.cast(self.equal(valid_length, self.range), self.params_dtype)
-                # Pad the key and value to seq_length with only the position index not zero
-                current_key = self.mul1(self.tile(key_present, (1, 1, 1, self.seq_length)),
-                                        self.expand_dims(valid_length_vector, 2))
-                current_value = self.mul1(self.tile(value_present, (1, 1, 1, self.seq_length)),
-                                          self.expand_dims(valid_length_vector, 2))
-                # Concat the previous saved state and current state
-                key_present = self.add(key_past, current_key)  # [batch_size, num_heads, size_per_head, seq_length]
-                value_present = self.add(value_past, current_value)
-            # update k v for attention
-            # [batch_size, num_heads, size_per_head, seq_length] -> [bs, num_heads, hidden_size, seq_len]
-            key_layer = F.transpose(key_present, (0, 3, 1, 2))
-            # [batch_size, num_heads, size_per_head, seq_length] -> [bs, num_heads, seq_len, hidden_size]
-            value_layer = F.transpose(value_present, (0, 1, 3, 2))
-
-        layer_present = (key_present, value_present)
-
-        # [batch_size, num_heads, size_per_head, seq_length] -> [seq_len, bs, num_heads, hidden_size]
-        query_layer = F.cast(query_layer, self.compute_dtype)
-        key_layer = F.cast(key_layer, self.compute_dtype)
-        value_layer = F.cast(value_layer, self.compute_dtype)
-
-        context_layer = self.attention_fn(query_layer, key_layer, value_layer, mask, layer_id, True)
-
-        output = self.dense(context_layer)
-        output = F.cast(output, self.params_dtype)
-
-        if self.training:
-            output = self.output_dropout(output)
-
-        return output, layer_present
diff --git a/research/visualglm/context.cfg b/research/visualglm/context.cfg
deleted file mode 100644
index 2bfc79df..00000000
--- a/research/visualglm/context.cfg
+++ /dev/null
@@ -1,8 +0,0 @@
-[ascend_context]
-plugin_custom_ops=All
-provider=ge
-[ge_session_options]
-ge.exec.formatMode=1
-ge.exec.precision_mode=must_keep_origin_dtype
-ge.externalWeight=1
-ge.exec.atomicCleanPolicy=1
\ No newline at end of file
diff --git a/research/visualglm/convert_weight.py b/research/visualglm/convert_weight.py
deleted file mode 100644
index 2837d987..00000000
--- a/research/visualglm/convert_weight.py
+++ /dev/null
@@ -1,303 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-""" Convert checkpoint from salesforce."""
-
-import os
-import argparse
-from collections import OrderedDict
-import torch
-
-import mindspore as ms
-from mindspore import Tensor
-import mindspore.common.dtype as mstype
-
-ms.set_context(device_id=1)
-
-def convert_vit_weight(pt_weight_params, vit_mindspore_path):
-    """
-    convert vit weight from torch
-    """
-    vit_name_pt2ms = {
-        "mixins.eva.model.vit.transformer.word_embeddings.weight": "visual_encoder.cls_tokens",
-        "mixins.eva.model.vit.transformer.position_embeddings.weight": "visual_encoder.pos_embed",
-        "mixins.eva.model.vit.mixins.patch_embedding.proj.weight": "visual_encoder.patch_embed.proj.weight",
-        "mixins.eva.model.vit.mixins.patch_embedding.proj.bias": "visual_encoder.patch_embed.proj.bias",
-        "mixins.eva.model.vit.mixins.cls.ln_vision.weight": "ln_vision.gamma",
-        "mixins.eva.model.vit.mixins.cls.ln_vision.bias": "ln_vision.beta",
-    }
-    vit_name_pt2ms_reg = {
-        "mixins.eva.model.vit.mixins": "visual_encoder",
-        "mixins.eva.model.vit.transformer": "visual_encoder",
-        "layers": "blocks",
-        "input_layernorm.weight": "layernorm1.gamma",
-        "input_layernorm.bias": "layernorm1.beta",
-        "post_attention_layernorm.weight": "layernorm2.gamma",
-        "post_attention_layernorm.bias": "layernorm2.beta",
-        "attention.dense.weight": "attention.projection.weight",
-        "attention.dense.bias": "attention.projection.bias",
-        "mlp.dense_h_to_4h.weight": "output.mapping.weight",
-        "mlp.dense_h_to_4h.bias": "output.mapping.bias",
-        "mlp.dense_4h_to_h.weight": "output.projection.weight",
-        "mlp.dense_4h_to_h.bias": "output.projection.bias"
-    }
-
-    ms_param_dict = []
-    for pt_name, pt_tensor in pt_weight_params.items():
-        ms_name = pt_name
-        numpy_value = pt_weight_params[pt_name].detach().numpy()
-        pt_dtype = str(pt_tensor.dtype)
-        if pt_dtype == "torch.float16":
-            ms_dtype = mstype.float16
-        else:
-            ms_dtype = mstype.float32
-
-        data = Tensor(numpy_value, dtype=ms_dtype)
-
-        # replace vit related params
-        if "vit" in ms_name:
-            for replace_from, replace_to in vit_name_pt2ms.items():
-                if ms_name == replace_from:
-                    ms_name = replace_to
-
-            for replace_from, replace_to in vit_name_pt2ms_reg.items():
-                ms_name = ms_name.replace(replace_from, replace_to)
-
-            if "attention.query_key_value" in ms_name:
-                length = data.shape[0] // 3
-                ms_name1 = ms_name.replace("query_key_value", "dense1")
-                ms_name2 = ms_name.replace("query_key_value", "dense2")
-                ms_name3 = ms_name.replace("query_key_value", "dense3")
-                ms_param_dict.append({"name": ms_name1, "data": data[:length]})
-                ms_param_dict.append({"name": ms_name2, "data": data[length:length * 2]})
-                ms_param_dict.append({"name": ms_name3, "data": data[length * 2:length * 3]})
-                print(f"rename {pt_name} to {ms_name1}, {ms_name2} and {ms_name3} with type {data.dtype}")
-            elif "cls_tokens" in ms_name or "pos_embed" in ms_name:
-                ms_param_dict.append({"name": ms_name, "data": data.unsqueeze(0)})
-                print(f"convert {pt_name} to {ms_name} with shape {data.unsqueeze(0).shape} and type {data.dtype}")
-            elif "output.mapping.weight" in ms_name or "output.projection.weight" in ms_name or \
-                    "attention.projection.weight" in ms_name:
-                ms_param_dict.append({"name": ms_name, "data": data.T})
-                print(f"convert {pt_name} to {ms_name} with shape {data.T.shape} and type {data.dtype}")
-            else:
-                ms_param_dict.append({"name": ms_name, "data": data})
-                print(f"convert {pt_name} to {ms_name} with shape {data.shape} and type {data.dtype}")
-
-        if "ln_vision" in ms_name:
-            if "weight" in ms_name:
-                ms_param_dict.append({"name": "ln_vision.gamma", "data": data})
-            else:
-                ms_param_dict.append({"name": "ln_vision.beta", "data": data})
-
-    print("\n----------------- convert vit pytorch model to mindspore model Finished! -----------------\n")
-    ms.save_checkpoint(ms_param_dict, vit_mindspore_path)
-
-
-def convert_glm_weight(pt_weight_params, glm_mindspore_path):
-    """
-    convert glm weight from torch
-    """
-    num_layers = 28
-    print('chatglm parameter convert....')
-    ms_param = []
-    ms_param_lite = []
-    for pt_name, pt_value in pt_weight_params.items():
-        print('current parameter: ', pt_name)
-        if 'mixins.eva' in pt_name:
-            continue
-        if pt_name != 'mixins.chatglm-attn.rotary_emb.inv_freq':
-            if "transformer.word_embeddings.weight" in pt_name or "transformer.position_embeddings.weight" in pt_name:
-                pt_name = pt_name.replace("weight", "embedding_table")
-                ms_param_lite.append({"name": pt_name, "data": ms.Tensor(pt_value.numpy())})
-            if "post_attention_layernorm" in pt_name or "input_layernorm" in pt_name or "final_layernorm" in pt_name:
-                pt_name = pt_name.replace("weight", "gamma")
-                pt_name = pt_name.replace("bias", "beta")
-            if "mixins.chatglm-final.lm_head" in pt_name:
-                pt_name = pt_name.replace("mixins.chatglm-final.lm_head", "lm_head")
-            ms_param.append({"name": pt_name, "data": ms.Tensor(pt_value.numpy())})
-        else:
-            for layer_id in range(num_layers):
-                pt_name = f"transformer.layers.{layer_id}.attention.rotary_emb.inv_freq"
-                ms_param.append({"name": pt_name, "data": ms.Tensor(pt_value.numpy())})
-
-        if "ln_vision" in pt_name:
-            if "weight" in pt_name:
-                ms_param.append({"name": "ln_vision.gamma", "data": ms.Tensor(pt_value.numpy())})
-            else:
-                ms_param.append({"name": "ln_vision.beta", "data": ms.Tensor(pt_value.numpy())})
-
-        if "glm_proj" in pt_name:
-            if "weight" in pt_name:
-                ms_param.append({"name": "llm_proj.weight", "data": ms.Tensor(pt_value.numpy())})
-            else:
-                ms_param.append({"name": "llm_proj.bias", "data": ms.Tensor(pt_value.numpy())})
-
-    print('saving ms ckpt....')
-
-    glm_for_lite_path = os.path.join(os.path.dirname(glm_mindspore_path), 'glm_6b_for_lite.ckpt')
-    ms.save_checkpoint(ms_param_lite, glm_for_lite_path)
-    ms.save_checkpoint(ms_param, glm_mindspore_path)
-
-
-
-def convert_qformer_weight(pt_weight_params, qformer_mindspore_path):
-    """
-    convert qformer weight from torch
-    """
-    qformer_name_convert_reg = {
-        "mixins.eva.model.qformer.transformer.final_layernorm.weight": "qformer.bert.encoder.final_layernorm.gamma",
-        "mixins.eva.model.qformer.transformer.final_layernorm.bias": "qformer.bert.encoder.final_layernorm.beta",
-        "mixins.eva.model.qformer.transformer.layers.": "qformer.bert.encoder.layer.",
-        ".attention.dense.": ".attention.output.dense.",
-        ".input_layernorm.weight": ".input_layernorm.gamma",
-        ".input_layernorm.bias": ".input_layernorm.beta",
-        ".post_attention_layernorm.weight": ".attention.output.layernorm.gamma",
-        ".post_attention_layernorm.bias": ".attention.output.layernorm.beta",
-        ".cross_attention.dense.": ".crossattention.output.dense.",
-        ".cross_attention.": ".crossattention.",
-        ".post_cross_attention_layernorm.weight": ".crossattention.output.layernorm.gamma",
-        ".post_cross_attention_layernorm.bias": ".crossattention.output.layernorm.beta",
-        ".mlp.dense_h_to_4h.": ".intermediate_query.dense.",
-        ".mlp.dense_4h_to_h.": ".output_query.dense.",
-        ".query.": ".self_att.query."
-    }
-
-    ms_param_dict = []
-    for pt_name, pt_tensor in pt_weight_params.items():
-        ms_name = pt_name
-        numpy_value = pt_weight_params[pt_name].detach().numpy()
-        pt_dtype = str(pt_tensor.dtype)
-        if pt_dtype == "torch.float16":
-            ms_dtype = mstype.float16
-        else:
-            ms_dtype = mstype.float32
-
-        data = Tensor(numpy_value, dtype=ms_dtype)
-
-        # replace qformer related params
-        if "qformer" in ms_name:
-            for replace_from, replace_to in qformer_name_convert_reg.items():
-                ms_name = ms_name.replace(replace_from, replace_to)
-
-            if "query_key_value" in ms_name:
-                length = data.shape[0] // 3
-                ms_name_query = ms_name.replace("query_key_value", "self_att.query")
-                ms_name_key = ms_name.replace("query_key_value", "self_att.key")
-                ms_name_value = ms_name.replace("query_key_value", "self_att.value")
-                ms_param_dict.append({"name": ms_name_query, "data": data[:length]})
-                ms_param_dict.append({"name": ms_name_key, "data": data[length:length * 2]})
-                ms_param_dict.append({"name": ms_name_value, "data": data[length * 2:length * 3]})
-                print(
-                    f"rename {pt_name} to {ms_name_query}, {ms_name_key} and {ms_name_value} with type {data.dtype}")
-            elif "key_value" in ms_name:
-                length = data.shape[0] // 2
-                ms_name_key = ms_name.replace("key_value", "self_att.key")
-                ms_name_value = ms_name.replace("key_value", "self_att.value")
-                ms_param_dict.append({"name": ms_name_key, "data": data[:length]})
-                ms_param_dict.append({"name": ms_name_value, "data": data[length:length * 2]})
-                print(f"rename {pt_name} to {ms_name_key} and {ms_name_value} with type {data.dtype}")
-
-            elif ms_name == "mixins.eva.model.qformer.transformer.word_embeddings.weight":
-                ms_name = "query_tokens"
-                shape = data.shape
-                data = data.reshape((1, shape[0], shape[1]))
-                ms_param_dict.append({"name": ms_name, "data": data})
-                print(f"convert {pt_name} to {ms_name} with shape {data.shape} and type {data.dtype}")
-            else:
-                ms_param_dict.append({"name": ms_name, "data": data})
-                print(f"convert {pt_name} to {ms_name} with shape {data.shape} and type {data.dtype}")
-
-        if "ln_vision" in ms_name:
-            if "weight" in ms_name:
-                ms_param_dict.append({"name": "ln_vision.gamma", "data": data})
-                print(f"convert {pt_name} to ln_vision.gamma with shape {data.shape} and type {data.dtype}")
-            else:
-                ms_param_dict.append({"name": "ln_vision.beta", "data": data})
-                print(f"convert {pt_name} to ln_vision.beta with shape {data.shape} and type {data.dtype}")
-
-        if "glm_proj" in ms_name:
-            if "weight" in ms_name:
-                ms_param_dict.append({"name": "llm_proj.weight", "data": data})
-                print(f"convert {pt_name} to llm_proj.weight with shape {data.shape} and type {data.dtype}")
-            else:
-                ms_param_dict.append({"name": "llm_proj.bias", "data": data})
-                print(f"convert {pt_name} to llm_proj.bias with shape {data.shape} and type {data.dtype}")
-
-    print('saving qformer ckpt....')
-    ms.save_checkpoint(ms_param_dict, qformer_mindspore_path)
-
-
-def convert_weight(args):
-    r"""Convert Weight
-    Convert visualglm weights from pytorch to mindspore,
-    pytorch (CPU) required.
-
-    Args:
-        args: The input parameters for convertting torch model to mindspore model.
-
-    Returns:
-        the converted mindspore_model_weight for visualglm class.
-    """
-    pt_params = torch.load(args.torch_path, map_location='cpu')
-    if not isinstance(pt_params, OrderedDict):
-        if isinstance(pt_params, dict) and 'module' in pt_params.keys():
-            pt_params = pt_params['module']
-        else:
-            raise ValueError(f"wrong torch state_dict format when loading {args.torch_path}, please check.")
-
-    if args.vit_convert_flag:
-        convert_vit_weight(pt_params, args.vit_mindspore_path)
-
-    if args.qformer_convert_flag:
-        convert_qformer_weight(pt_params, args.qformer_mindspore_path)
-
-    if args.glm_convert_flag:
-        convert_glm_weight(pt_params, args.glm_mindspore_path)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="blip2 weight convert script")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default="/opt/visualglm/model_sat/visualglm-6b/1/mp_rank_00_model_states.pt",
-                        help="The torch checkpoint path.")
-    parser.add_argument("--vit_mindspore_path",
-                        type=str,
-                        default="/dev/zsh/models/visualglm/visualglm_vit.ckpt",
-                        help="The output mindspore vit model checkpoint path.")
-    parser.add_argument("--qformer_mindspore_path",
-                        type=str,
-                        default="/dev/zsh/models/visualglm/visualglm_qformer.ckpt",
-                        help="The output mindspore qformer model checkpoint path.")
-    parser.add_argument("--glm_mindspore_path",
-                        type=str,
-                        default="/dev/zsh/models/visualglm/glm_6b.ckpt",
-                        help="The output mindspore glm model checkpoint path.")
-    parser.add_argument("--vit_convert_flag",
-                        type=int,
-                        default=1,
-                        help="whether the vit model needs to be converted")
-    parser.add_argument("--qformer_convert_flag",
-                        type=int,
-                        default=1,
-                        help="whether the qformer model needs to be converted")
-    parser.add_argument("--glm_convert_flag",
-                        type=int,
-                        default=1,
-                        help="whether the glm model needs to be converted")
-
-    opt = parser.parse_args()
-
-    convert_weight(opt)
diff --git a/research/visualglm/examples/example_inputs.jsonl b/research/visualglm/examples/example_inputs.jsonl
deleted file mode 100644
index 60d5ec56..00000000
--- a/research/visualglm/examples/example_inputs.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"id":1, "text": "这部电影的导演是谁？", "image": "examples/titanic.jpg"}
\ No newline at end of file
diff --git a/research/visualglm/examples/titanic.jpg b/research/visualglm/examples/titanic.jpg
deleted file mode 100644
index d6c565d52bda032df68389da2e1c62c491f94a06..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 29865
zcmbTdcUV(f_bwVhdQmYTU5a2rlM;Fn#L!z3dXpxSAWCmGIv4@zVCaz0L+^r$5UL;v
z0qF_|2vSrKwr%gT;{MKefA>81k9%);=30wY=2~;icf4baImi5G?w|LdGp2?nh9EjR
zI*>c?2l{6bWB_7eVq#`uWMO7zW@Tkz;{bDVu(NaUbDuj678bZ5A}k;zBzjT)lBl?>
zgpd$K4Jxak2!p{cT++}{SJIYOhAGi1p<`ub<zVOF<K*O15)%?r`v3g<=OySY3%wSD
z3<I4wi2f`c!&$n220<4<AUa0iYiS?+pC27P!zo54W)@a9cHoBgGa!081_t_542+DY
zP62n{2d;xoon_<}Q`BcVhwxw)4+Ja4WR$T;Ty5^>L6SdTRQ3#tWo6?%&&MwyDRl_~
zm4>OPs;O&e8eD@L8X23IqHJvK>>V7Pyu5Gt-1J3bf<r>X!XqN1;^Gq$lae1{GqbXD
za`W<^;>s)VgvzSwn%b7uw)T$BuI`>!1B0ZY;gQiX$~1Lm_U+vK!s^=khmFmxkJ~$6
zz8)TZJ3jgA`wv>X=s*nrHtWAF`#;)s7HAj!sZ$K6m}%{zqYnoP!`V}eVv0=M`Uqx^
zz;ohCF)ZM#8D-7=tP;w|&pe(%WH#Q5uvN(~w5I)O+5fwS#s2?l*?$}Mf3<4?#KAxZ
zbRNT5&=t_3J9G^Pef2bdb+#B<UJT(YFEX*hE9zCT>05z8Y@wo6NIuWp7&ASrW-8Y8
zWst7BdV{>JqUp%8expIdT^CBHPek^I(3h34V8^Puy~;B->1a3ye*D<}GJEW4o_U8!
zsTiw!_ROr`6Qk^{%2uFvAnDSYacUpSvE>F}&myd%-q2b$o0E&RSQmjL51lLZ(xdGI
z^*Bj<Rh}u<^qwiGW<v;FR|R+%@dN|94IycXP(7msv5J-?zEY+&;4)l+A8UGF44kM8
z?jjH>#quk_KOkKkKm`<QhCo9owGH$dVTCVcB3qKcpam$^nx4)O>RWn>#FUE#`Ga>L
zbZtnSDF{-I{ZkNT#!z2SDMGK5u^bBl(Fy@iD5b|jXa!FP2U{!rdjSeUz`-y7_j6EA
zIkD1Hv@5Xe%FJjsNO`oz7b|eTm9TV55}hSDj;M_kI1m0}Vu@FTYop-mY|I|iuJUYa
z`WQ2~90;t$Y>3pvB1|FtuQ0QT3wvGPO(v{)7W{I>H<!9DD5bpRv#IIdfhI>$KaY`(
z>+!K%n69cg6!m7LM~FNT@ek;+&`h&S!{ZuuuZ<VWD%6U4EWf3GDJvs5HaDLUjGUsw
zIw_hmiY0)%%A+|DSSJXb;=gj>Ba}y<1A79}Fr~?*0^HW-!h|q`gCqf|(?mqu-uNRZ
zRCOs6#k0i3JWLP^rRJAjMVJAd$O?yo^GoS7^T3|b+*lXjZZw{xj{VbbfOvs!sQ`mw
zbFro%CuI{jNZxls5)9fU1BY__`10uUFrQ2;D9jf)<i(mg>46b&5Kt6oA_Pi0BN$;$
zpH89B;D96>pCZvdSP(8Jsi4599a0%%#enrONOFp(#0He&b&D*j(%PaCmf+Yz2w&Td
zsSA?sRc<<4C<7f$%wpDK`Ufs6b*o=VRh{eIJd`Ozo4A#H5sbc_^wOC?OJ>~6+qTX2
zu0!y>REM=_*_xLl#AQJn#S!tGma5q%Pi@aaCB8F}JQ6y59@$DqFgvctwXEkxa-Mrk
ztMmd$DF<W?DbGgIL8=hurwHISYXC6FKPUpA!3O|_2AV-tKvcOgkKl6PXmf<2?*vE+
z8fb=tM(5*6npn_ns3HxpYuHdQXbC~vPMcW50NBGdQ>?|>0L+6F$k;$UfMGxs`K(ye
zP)Skl%v`LyB-k?#49Z5X0lM>V*O(dp!K^1BNfMX?iiY~0`OilT0Wuc-kBnm&#ej--
zCBX<|eG}jjy0$h?th+llVA~ivKv0(90<X@dqY@a!s9$CaEO{ikz(3rprWk7AlG@gE
zmiQ`bIx{^&`Flw%0B1^RRX&&f9a*-7>=^^uoh`|o)_#;vd0$kEiaHe=TRrpBZaBo*
zo!JPs#TdWipxZVn%k|73hPVg7Lw^`nb|R7s|Nd+**jb1zo?YF$=?Kn<n-JAu%B?{!
z#9r6qJHr>s6W*>Mq>0emhKoH0w2_`nQ=fk^^Y1=;$Mjz)<ExMYOhBXbX!xfG5Dqzm
z08qzI>k4b%@@SxM+E`2>Frbs<B!OqmXvFhJ8{$zloagBPQJY}HfnBO3JvX2+eA!4c
zfOa{Wi2siRms91rSb2IR87>nI2-roI4|quz0A2V{EEZC}4hZ^jvZ5iRJcbnxMs>YD
zYbgvZFO+_5#74ty4J4q7%@+V}3>SS7R^JlL5e9vgS!B!zP*0^IfL|o#Ulw|*&8{sc
zm=cFXm>ZQ~e%#$Yk;~C_yaMGZaeMO6J+EUog-(54<)xdG$;|+Amqal%M!?jFsghYj
z#jf&E7fCEnPKCLtBx*~T6r#E!)0q)FM!5gQC8ZCeEU5H>%{?l#eMt@7bBU>{QHvbP
z+|o^#pCHvo&uSN8`?d_6&keAG@vGd-+<bO8pu%n0<yeEJY$SySazp^o#C#f#{zE1}
z+l}e5kV>&XG~?w<fMwIL|33<(l;L06q4j_<8V&Rg9L!+=U_IOAR4IiFe@WvW+O`c5
z$Z7^i^^cGhA%ybB(X<`_UZLjzB9Q2F(}w_UflUG50YDLW3G&~MLb3n3g~n^L!E1%k
zItoqXC=v{nz#s%&Cn}&w#;3tMzEwysVH#J1Ih+D2DRIWkl<8p0N;)$WGd*@1#AfXp
z@s(w)qMrIwvqZ=z^V65RW6jVw4JH=#YHGS)*%v!fw>8-No600M?5wWAlc*5x<ab<3
zmF2?HaO<IbR@IsOmisF+3xD1IG|@cwFtMvU&zGkHk13h|kUNT6$l`xTcAOO;wSQd|
z<j@iz*YF{+rce$iNCgbkhD1&&;0gFreOlZ8M**x5G#Riy27ugSSkO=lzDk<ToBt6&
zG;qO-4FJ0+FJQP%!H^Wxe@F<bzzuXDz|(*MpxG`!+;|#~oBaVc2VfA*|8NI{4(f|9
z^~5^qoBfe<98JX}!GI}F1S7DD=BLX4&^@rhf@rvo){tQ*6f=%~e>OvQNjcZ|Y~R3D
zIN{ww`BrtEVsk_Y4c}~_P;C@+Eqxg1D^!hQS{0kWW;(lN<#bI-sGOu6Js4HfwnMYR
zOmdJ<CU6$6E2L(Hq6^Ipp+-C_ezW5HQzdI|@PL>8w?u}B+>(&hR@7t$OHTH$-z2*|
z!;kED&SoyY-j#0R=ZRW6Xz@Kfu=SX`IB-<6<kj|lg+1NCeY20*vgfBoOUDCujYf5g
zy0CgppJzm&2CkKO8<Y9D>H-)NIZYE39e~DCPs;yf6$Jb|?Fvn&{|My2d&xiC@yBNU
zmxttNH~rr(WVihv@c(0A+pOu4fKm1GtwQK=mKVW6ssMS%0D9vDU7~?qPm+dxnxUXk
z4*Vs6<ns5CmY5mBqH`xRwrUa$_kC^IGWlKpJpcFF&kd@xVVzH1M2hNaI{VUA8R(Uw
zum*C|Bn8xLv<Q?+w#2^-s6xRt(@9k*H1#D-=%Tzu7T~s*P7nhA58gSRG7PRS^sam}
zw+|}g8xS~pK4^bo%|deU1xC+UN3SmV>BRmeA_V_201elTl}fz&<ZYaJR+8?~LQYbC
z92IFSum4WG$Di$l+ps=j+hz2W2!=%HkF~zPZK}z`8|5pSf(^ZiwjT|>4Z5NrH1i{-
zjEeNhgAK`nmnd<jYXDp;|KnfLoUcDV)-K&2wm~AX?f?kI<^X`40{>T@|H|=?aJgP#
zp^S3$Y^F~C_2-z~@c<(MBn3DQfIQtL<^I(oUN$H|6Zya?nyDZJa09M|-XP&WP9X3G
zJ(@Y7aXn4ucpB$KYd`;cZgELrc-%Iop<`!3D|y7@ueTbJKK|F|j)+eupV~{+J6nEH
zm&QCtkDFwN@v8X+^6?8MxbX}*O@21zJLl?}Ls=T+dT__IAm$dTCbk$t_X-gH4wIZ_
zfn_N(l7gh1X=_`eY~~LmDTqJhUrLYr*dk^Gs@X+Tt@9lQo_(Gn4&2sn`f0CRgI1n1
z4u?YLut%DSI}0+^0#GJ>AzLYnCrVXi{l|3zsgJi$ECf&IU4b6gmd>!;i_^v?5zILi
z-xgx|#Q)B!(ILF-S3hvwXn(czvIROB^%ggnB$?!a+krVBh=q+kuri&r5U1R<=t&<r
zUsFH&O>g*?j*f->-F8sk!;#r@HDtjd;R(mfV({-J%nLO|o}d7stfTtK+Z3OL(n1P8
zVDQ<~2M_nNOiaFAkXBox+74j?LrzC+CTt6X=}dm(_217xn}aH99QzND%*<U42o)tO
zr+iF3GgDoD-nq&*WrT@f@=jI*pVaC~yId$sgMCU}ILfMsJtwca5*w<8?T7Q1S6>B8
zM!sk%!t5%|xdOcLm^Z5iQ)m|`^bstG_iWGe>9{ybiHU+Qq&y2o@_2>y^)SoOb8s>+
z+z+&n${Etcx?=)h`(ki8Q9}6|n}M0JuP|V+@jy+qcp!^Lz&<I{biV&0!yK?^c%b|N
z5$b>YbJ73a{BHmOMMD5H0uUn2nFXrAf;O$hYj*5j-9^P5a^(c&fl>q(wlqjQ5Rw%_
zPo3CjWq$Fghg(#-%$e9K4i@orvgkeA-?~Qw3L4-2lnSeuDhIk(1~e^W(m&X|>gK$H
z@U!dJaN3r;T+wquhI1vGqX$auw;}`pdR&DF5H<q<Mc<DN1H22s{{=8i<0-(1wSn5E
zEz6^uGl=NN4{uxQh6>rU%35&{1yj!WQDutX9^Jr6btOCq9A)T3dbb6Bud13LDWB3n
zrNd2=#x!;?@eXOCB@sbUY`xDAYRVixY#rtcZ{Kd`=$4!CnVC14YdLY%Q*oFy8CnZQ
zcwr@w*BK7WM>$P!0i(jTx3#-@*j;*`vJc&z4vR^hzcfoY8{Lg~(+7Xt?Ydr=vcFEr
z&r~WRu-2_|O$?^k_={Js2B95O&n8P22pW>@kfe$M<wIN2uSaBGwM^s4HA1D651u<&
ze|geE-Q3N8gslWKiGaA0M;uC4>%vgRrzfM;0*siNGMy`YJ3?lIl}kCe0%wVK*X%8L
zXYClTJksBGBKMf_g^0$jv{&ffsLAmS>SIR0dHP<%|Km!s0KHrlz*Z4;ASG@n%_6!+
z92HHI3ZApq`AvAb95bnm9jds`j%~hakTQ6}e&`jBa@y@TG$FaqV@&*g56#~xYpko}
zRYyG9^lyS*aH`(rrtZkA@QJ###_h^*eH!Er5(5R#HgOJ43w3T;&JVxFjKY$V<$t3e
zu8KbUR^L~vi5T)_xP67&QaupYEYdp}IV4jI5%RXdq+TEzpameRX~TPq6<kTMk#F%X
zN$TR=UGn(rInLPhoUFoM!xX|uyX6+zKP0R4hWmpY&#pSN?pp-@OX!Bmq3^ffrcu{r
zGBo|q)1p=)$1A``0Ne{y7vR3(VStwf@NPNabWYJ3ML{Y`X$T^Lt^WkT|4)H11PGUb
zSnglD_{ZmCR-wgT%sge$rFOb?m31{Z#aPX?j+bQ>9C(qn{M~?j(Y3rjU(D=jwCcvG
zvdz|Db5@36`#pI9R-U9r6`Zv914YC|Sa_3h>(z!T4IY)CA3nt60EzS)I(`+gYZ_BZ
zLe%m{HB=zlVT>)(1HV@<l?At<YL;ZBpy)s#%AEr|N95Y$ghUEu4Y((fj17a3=sem|
z=#ZyP!=$xRk}ohb3!$$1uByy*y6V+m_<Wg&fU_yj5_%Fy9}*@lqo`gg_M$nCsS+Hx
zppXf*u<PJxz3v)4hatF17mu5XQUHYi<{l`ldckw}z}?mj&9qBtBHy@i?~I@*xpymr
zban7c3^r&?uYWTKYu=Un+*=pZ^us1&Bp#9hTI~0*-Mf9tD)$OhM|$00(`0k;Y^<)g
z4yT6<@!}mp9mL&RkXpKz5{b1Z+^pihX8!3y6<pu0uI%_Z;xsCjc_UG%rNu`t?%(=;
z>ZXS_(WmC?vsQZ*$ubVLOpcA@z<!bPo@M$EE)MHus1$nvIq$4Z4bD%lMhUCg+HrsB
z5hE*jZ;V5)eT0;<x!6@g7{PRA=3#(CXl^Z9nW^bhQ5>2Ox$#6#M$o>iSmuV<ua>>H
zQ%LsnD$tx;l@{w^>YLpX5JY<vHP}QnBdKsw2h(b7j|&riET1wUJ1mm@TKX`?T4g{D
zg?P%BVqHG?;j!Tjh6El#Q!AD0mu9o1Y6lO`MYb_(ko4m|V)UiXb10`JM)!^KqDlzY
zV;wduQkZ93ar#ek0za9t|NWUOC&#!lb$jAcP0(G#tB$9~O_+l1{l7Che6UU4&C^Pc
zJ&kbrTAkUdbw9I@%uy@y?n$royQGbWZP_|3rOs#WH`|4LvPs)fwcnA3Qtg{+QMc7J
z1)zZ>dCoAG!?`i&UoiJ6UVgY1*fZb3U#!YA9qZu%sQ`PXg9w0lta)jU@)^J>!+LQ8
z@gtxXSo&9ho9(3U(T1ufF*Gee=kNfsAhdxi0B5vuDOuI@>Cb4qHSamtjyZBI-vPJ?
zoRy@FX(8E#rmE7iV+&;ry*yqOLK(H$3epCH7f7M}CX7K*#o5P}jzP$03eY^>?LbyT
z{t4Kt;o>-85NW@vN+o-XVMFDbZKg5nuv4E$^{Bt-_pXVXY1v6CsscC5=G}%P&$tih
zKCR1dRPX4+&85~G%2ZDOGEJj*;kZj}L*9gA5r}?md-fi+Q#M^NEHB`^7a7a)5lG-R
zm#}k8DFUH<<*OcwW>^>GTF$EhVDxOR0tjW5c~ToNGJdyh6jqj*{Co^;_;2UD^lN2R
zv0ETzv$aW~GS^Jfwlt(F6By);YnM>HUz^{fe9Qv(*$Ta|Oy@Rh#M8AS9{k?X@Vc~r
zXu9Emb?hwGw1FK8GEfSd`=rwyOr2Ca2@sJs1>wvk0lRnzqa|98uZqNb@`2jN=9C`k
zUD9K<YVW_dDGK)N+H>m2^=snRoW%=7NDu<4gCUyRs|PiixtW$Fz1B10=yP5tfr@KA
z&VJXJ3I6j20>?aC)FJiVC|DE5xlpGeClH*U!;0FGsinwPbaGr>S84fj@o{YQ10I)Z
zgBX`ql16JoF^^tFAnzy_GgBGKw`cz<C&DAnZIZjEhBHdUP&KKwxIjr0$`FiwAXB>~
zoG73dY^7qcslagg^GT^Jg4zHlP{zgh`iE7<cY5OUE@xU!zN*Lcq`&C$G(2-5w|Ucl
z$#yVNZL+fUcI8@b?O-=+8rrw~shAbZdb)b0O`Rfp8zo#?^`bq8YRmg+QV%Z1A(zN*
zJYI|uUPB9-8J<PIGDxu_zDT;3ucA{Z7Af8-=Md-;dQ+kyM;%=Hc{<^JitWffO?&lQ
zb{pM{dUaohWX6b2=wgzlweRLXpjDBq)|=ujX*t1u)zVof4|Bp-G3|ThrUhRPV`fuS
zCEnRzI-AsYC1ijB>w+44zfMW9?|J$jrYv&g1^$KHtK=0Y@ztneE!y^ZB}h`zu5Agv
zT@Fvco+TZrZTm8`k=H!{HB7+R?H9uZdv*>I@Ra*>7fujRGIw$SH;szV3nv1+{0!jg
z^`>{P@;(FFvmnnaXkxYa?<2q<uw{SwdedtJHf%wa95{Cy%2K0fxF9ctcOWbx{RxZk
zSA%zbNhZKM8-yyt$Q^i+6r?Mt@FmR_utf;j<ta4LS<y#BUn0A9AwsBTQ)XZ+A#iSD
z)<6DOF6I#O$;DDNQww|Kxc-izvAR*W$3S=qdskpa>#2&TqN(OITph(Tc)M*1{aV=^
z?8rp;sbhIpIos=~p~~XRso-kIG=Z)dD-N3vLISyoPj`&R#bZ-gVDDw#)VGh<A?f_b
zbo{ECzVNjDv@>W=H86Zs&;^WipUt82h{i3UURHOP3&xqnSpuBl62^~p5qJQIgbM)#
z*T365b6yP@c3u3*{@pt+<%`?h+{644E|UYdI3nh(!$8uc!&w)h!}`fdE!=`2AC)|p
z@EkRpqgvc9C`1{at*KD*WW8|NH2L6-Q=nHdRLK{rFdjwO=tK7wAKkjt$IylEJgMoO
zj=dagIP<O4APBRO{`+%nK2gwykGb!rS{a@zjVeDw<eQW!PP>_$7C|(-TPB`T>^EM=
z6lLR$u1KFJYB@#;%}RG*mbi1)x-dot6oNzrD#O~eOQTwDzeT~q=~6_)r3x)xLXdiG
zusmzs1}61qq;=lk2blerv(`l=s3(GE#X`#|-M4d>C;tJx$`stl{u=fhBlFt$<(bp%
zYP-!nU%ui_Y#JIOROP?F{FPMm5tBfHHI3fp5)=}+cJs*xufh-&o{34)lI{rxjUCp|
z%{um-n^559dhB>ftJywO>(Py8Qg)+4w({SJ8S$MQz-W-II8&CPFD(0IC^)-3TK1}f
zA7+qYp=ot;XQ53?uYPksRdm<i5X6IF5=*e{)cP!ggn;<PyJWP4k8CHjRNn_XTYv4Y
z*Rmz~JFAVm>D;K<MUZ(x_`qULU<kUY!FaSrSk9F%oQ2ot2i2i2{L#<XR64)y8q3e-
z-xsFZ4U><_9RvW|)A?_*AEPVKH2`AM+&9D*O*$%pQle0A(2?bD1r(wFv8#4j{uUjn
z*}=?Et_4-<Gv5XDk_$cp=1zTPA+?|pN2|f#3_$*O;Lso9lm!%*{xNt*Y9;L*8nJ^8
z;wb`o{(@*1S)({Z&MW?czFoV({798QfpngoW6`Xmn32Zx1-_&!z;wWRvj~JltFM7q
z0(d?PQ1q3@PqKZ0cLTF24O&7SjOtu~BE>ihb$~Rgq9OFffQJR*s{%pw){3z9!to1=
zE|-(V9NML1R-R4Cq<?YkHp|KT%fYU#Xub&U9b8@rL?beWp8kJ5m=%0hiAAKtU8uhx
zHc#bDbZ1Zlr#o#JsP<9z&GiHs)1j#(C2gFqcV{}j+9oay-M)Q-gA;eXwcQo>Wm2iX
zRCEe><MXU6znS_XtGqn~Y(1;ujBT^i2Tq0I>z2lCLY=std&+R^eLXt$7JaIDGKL(-
zu<d57#N=1@`M6cCZTws_WLRjHN0i(Lt?GWaSLF|`4li%oSGKH|5B-_~$fn2=pr_<7
zCfKao7}Zcz%2&;4$%=w6u1^XuQmC<SJpIL7GL&CNu<P@dnp>4JuiwZ%XrHn2N^2bC
zFGcRb_S${i8JV(+q20hp*=_Lx9E5Jg<A9RQqe~rwG;yc+G%R+=sM~h&ZMNd3x^yf;
zz9*QmXJd#A-ZsBKVehBSDZ8!Q`c_x%$IGe!#|*CfZg%<g6Q5j7wcupAC>BFZ#&g8s
z`W?^6n_(kn-Y0v#c3KN5>=Dl1A>E#+a#x=4#7w8gDrQbbvEg*CYBhGv|FC7?=(JDh
ze`(2RC-CIJ_6PfUgtX<dsUK90FECbfHjYtR(MJAkdH2$Cu$`S%p2~q7&l47JatCEh
z*=4^hiVjLnB8I4)ClA~r6>u(e`OTZ>ahdG+8B0i~4RSv}xmY+2SMGzTbWhF!1Mji{
zn$&+lKK|XXecl`Q5ZFLQOHN5e2h)2P$7t}vi>e_3OSlv(#;K!qh?OVOi=BdJ0I?aa
zwcMH)kJz_6`xe)vQqb!mI<iN)QK`K=X{htbl=JF}5<Ab!2FyyHX0`7AU`9ly!$o#&
zJ-fY<Wa8$lIxm$RWpq!xhr+YDn*sIq9X~l2%2oBZUAU_N5%90nn&uQ8=|~H1+~Cte
zAA~(VDLg&Y)YkXaIq_1oG;y!DGs|o8>y*e)?}3*{SVMR%kp#On@z76poO|rqZ)LO2
zVj10Gvvz}wywEH6iv8RnM~%*$lA~i!G<z@T%rgN>Lh_w=6&ljUGx{#t(Nosb0L2xf
zonMTs@;@fL7D~9^R-j(zaExEvGgAM~M&Oishw1EGNH068DAZtEWMtH-eJD$An%=sU
z-q^X28b_)3hZXp4ab11>4x{?hHnq*aY7(gAIn}>T(0ATQp_@gO+)*`eIAEmk{Xd6e
zE~_K<i^lc~_1{AZAi$#?3odtUe)=w26}rUp9ao6V=q_BXO!EjjvzCir@LjkM**p(<
zjkL*!xszY(+8LQ()8kCyELVLbrDz^8bxAhpJu14C&Jtt~pVZf@iqR9Aen|*02CWq`
z0@n0p%`JRlj3e%M=F)aUV6J|K)B9k9SCWs?(^zWbyIo1@!4jjblRDD+6yhg=d%5T9
z`2+A9m}Hfi=T(B1$MSQ_1zLpsuupDrRhZe6FZ3;(EAeliV(*1>zg#W%#D_Dr7OBZ0
z@X|pa-#7_z9P<qA{4J5I^%g7ZFC;YUEdqItRZ0m@36*M*dfFg`zFq?}EO1V<mA`c@
z_s26edg%T8e!82JW{&2!rCDzH7V=!YFa6=f;-d4D*DK!$^*+mEDGJhNCaR{V%O><3
z=RQoYj|BFQ66CB`yLWoDH`gsw-<0x{VaGpzxGa(-@+Bo@_PNZP?rpg5zYMwvANJ^~
zJ%4PalyxdmZTPwq1t9PmErwL`mge-10yJ^WD*cEQ9NGVd81=k{;JmVXhziga5;!cn
zX`yNHY0zlI(*6a{j3xSGpk3VK?rS<)M1x*|uwA=EhB07vt)-xs+?UGB*F~RBZhxa*
z!3^+o6GE?wPG2@i-onv#*fdc=+5SwR;|S$cQ!{-%f3|$EExJB!AZ6B)b^li7EQ#<u
zRWR!>+Z<{NUe+U8B}Mdx|D^P{`W-xH1$GiIx?a{RDYJ7+u6=MjbwmG1<w^CzxZh>1
z{HV7Ex1@>{B$%1|lo8QI&7JBz18hsJty(t>kB-DX?=4|$9q1Nu+jv{EY-wHl9V228
zD=2=$FWl^f5*;|HN)(4mtqmUbteO0L93+Pc<5qts!q<p!@YylD^{Cvl_?chz-6Vb~
zCHpzM8{-vBgGy(X^_})pvUAA8$byvm;4GI#H)ChKucpYiiDiE^4(ng|FRm)bMUrDB
z(r4MyKD@mgd?8EA&@#{!cC5aWep1ENwt#TvJDsv-`(w6E@jK;=706C~N5*qzINKqV
zbQ6X)*|F{ET-d@@TVuOci^fH=6NSe!u7*MJV;I(3Y)ehi;#=MIBkPOuiq5LPAT>c<
zklfnkqFRe|LzCL8jgD_Nym+z?#>?vEURzUPer@*}!fv+PJvKAC%BOHA(6#;?{v$AE
z;W_OQwge7#IY%<p6p2^JD6Zb{hiShm(O6OJOk2sZN5+j)vu}@DEAP2}T9ifns#WJ1
z-N}7v>Ai1ZDAHPF*75}wDdv6qV+&J@x692t7o0Y~v9#+e^V|Y0MPoKILOipf+CDu-
zt!d$h=PN?%>zooS{cii4olw6ca!ME7uGN2?*c^J7uQKJoR>X03afny(i4zc(C?l~x
zLHPBGKfXG-qiA$P0&q5eqQj<O$uFxs3wet=a)vTQ7F<w(XOiW$!H-o$`d9eNB#H4t
z{&>~j<I<mi{duGoRDYs2&&7z4P1<ap-LGCWQV%-w09;-<2LYJqJBx2kZ;vWsQYao~
zvq(D=bcT%0yV=eN**Y)A1sQHPYeTdlUJJzo>?h4>@#i1;tDM=lcy!5z{YDCRmHr%$
zsiHOkx*M~It<T2KK@T6#=X4-i>LWuULxLaA7e;{J1A(%=>l@X9Y+&-vCN}HFMaVXi
z&O%sP9~DFaN$aUsV$(xvo<T3(@YB?e$|)1RD9jnTu~`u8B_Z3~1o}pbnqz>C?h^N>
z@Z?112Dravr~IZZB=}audszF@&I2|RrDJcgPiZsbt`QHBt(QXJ(fdq(<tpNnCdh2p
zlm(}|2GT;>mZvENv8TlwpXwx|i32LqFZfh=<hhpKug}JC*U|f#nZ#;*<BP{LzCk&x
zr@^g+NXDY?v5)HNc=&^!71xG33z~2*PPo1FY(K|Ehf2Tu_VI*F>mxPp`A=>M@0XE&
z&w@Gn(<T`_yLM0??6s<GgTpk7vY(;<DsSqZD^~{lSFK?+lE#uRniGSc%TTVgg{;HS
zPd)k9dcub^sw*QT67oBkG`5H~7pIkLEKD#FflKwa*)_tc@_zMSq)P6n-uAFa(td9b
z8E+mdp4o<1K9Ryl3P;<J?=AM#f+B0)?-)i+t;D9)7a5$j5Zz*`a=*sU<NQ#2Ht?Bh
zCTHPi$lQHtKTqLgo!YTd2?A7$)o{ai?Gmo7E(YrYQYY|ezm@Lq#7(tb`}nH$&~7y^
ztv$zPL6zV=p?niO0Ag$QFB^(ntJS|^>E|=+5*_SzBhpgFFv>=ZRmVccOfpg7e7iN(
z|Fe1io>Tgp_)j*Wk5hA%lv0X%J-eX68cRo)_^N$_C=-WgDlUfM+q%3<HeK6G?!ca%
zwCDyG=agnFt#q9=h<%uhJ6=H8-F$MO;VPV(Gd6llDH1%tbzR>^PIjX`I<kfPer?}T
zLgc%c?>5%>w<xD?DEnsKs-W~mlQ)+L0pXXQd#<$KwWZ>5!F*or5%s-W#Hf&U??@h>
z0$`rCO%ZIDlM+*|7+z1dGU2$zC-77zhuP;g3)7fIG>2z-=L-{wfl%<a{Q19R&WB7U
z_=q@tpRY3xv>ra>944AGF^XPXwlYz`&{LO}$bK=c=m6Ux&XD8tm(u(mb>waFQ0B%W
zbn3>3;35s&cZDTMgJ`ZT(LfiwGWFh~_-l-u*gMIgj*h8q=3yt|-P7UP1|Nie%(w@n
z$rNR!lAyZA1&SA1mly#L(<E=AlUV{}pXdB0GFVM|aEsEjC%$>t-hm*iJU2aT@bvWM
z5}BLm*`ki}&W8#NJJP^h%bg72oLe81ZwNw<?B06M)53%zGqa}&=`Y)!p{dAcB5`4q
zJpzGHeMHN@Dz8!0;vh*usoA6FfEXe(h6x&I0%tf6_5_l7fE*xT{u&rX%FIna0+Nj&
zrLyrrWZ~qb!p;foL%|qc!HztCIS3()*B}h5iVX)NXh#51nZf6F6aDdvNF;E^=!}GJ
z_WLAC8)Ij?y7OIozxpSa=31f+CydYhu!~*zX;VJsMX>5HuyM>kGyO_{%o|53mV@5)
zzTTWs2>yY3Rbop1DSA}JuD^}^B%Z^2GdkD`G#lL}l6}x}7kNy+(BkuelaVKpU;oE!
z(%d#DQ3Vll`o_|Tzx*WP@MY5H#w%Z3v5xQPgq&@D*w+erxokZSX%4N_E_fJuLG-Pm
zNMoN@-Z{h1+Fdn6aP;;`E_uQ1P9UGw@Yvi(KbWy{+z_Y6OPQR8?=r<f_wbfX>v2O7
zm0Kch?TxFyRfUmz#1B775jFmj)rRtxBa@vu*RU=(^!a%{JrKsd@>kIv0juhIr1_19
z`ircaKJ#6+B$I0v3_Ak}%d#2Gi^X>3^*qKaQb#@?Cn-9(+}5@tpGBJvdm|Aa-5W%q
zw}sP|ufL&sdf1#yp<w-xi!%9%Yg%?L{hNX!{5-95-gfWtiWQ*<o1#bA@&P@>)T4}D
ztX#$qwW#qVz0|PLDC6aR9gxcQ^&4^8r7DcGS&U0=&Nu9*?8e&DB0VBIwZr}aS>1K6
zp-&|<3~(h@k&m9X%giJ*u~JJg-@u=jGlMU^?)o^B)QCA2fcf*JV5DR^Wft0P8_SKf
zKkt<Et|Tc|JIkf6J123J*Rq`H@Y8jE<dZ-4I{aKQ=EFH5j@b@0e=YVJkp;<Ch?=P}
z8L8x$_0N8!NyajpI%TB5Vv-Lm#c`O<Q_h`FV>Ua8@(-+K=(*Yn4E_y5*Uo7{EGqDc
zUY$fsl9RX){b{1Ne&9<UjLC6-*Kwn&HTq*zQ%#80NL(rWS1m%augx{i?#kkF0SkVs
ztd8u|_D*}rtGGQVu4U6RK2J4%%R0^YI4<kJWwuf4tOP6{Q3&#@eHP0qJEX@^x6pfH
z7kcQ(eA4GPJ3u_U?unoFf0`LIUpbws+6S(Gq81)N8(;mCF?2fax9<t@kwo%htnTOf
zh!?YLZ-uJsEB(wfPk-~Q)R9*2;;ZqKxT<V%fXp!0LC4&ll-rky{NSv0y&%?zr=X(L
zFM+8sB8}NMnE&VFLbgLcCtDlGZG;8NpD$HgPE1*(vcb7M?hK2NNMbi4<dc+$!qQ1Y
z?5jfyZn2)!iuKGWpC0M{Azt&L?CIj_9S?t1m}_-%)CacPgWc4e^^L&U(qez{of(f7
z_h~hY?zN5(*DJf9CN0U)7LmE(Z-c28&K9Y@US|iNchxYk8QO^#wAYo-N)nCsD?*k1
zq7d!Xe80%JCmfns2)#MEr=t8ZOCGGdy4BFPiF-4Js+B63${<-@zT~@vew0bExm(l!
zSLvA@=(7Yt62nv|xPS!fDwR*##6F2295TwVeN0D~bF*2mJyzH>_=x9#E+&W_iV{d5
zY&bJPYD~}&`Yu6Urv?~5GnBTdqNFBK@|4)ml99J`vm<X((!2q70{>1m%B2R2HjP35
zc1CQp3-x<R_!8KpJBto1xqr^4DnEj`SW8<oBDoJ|@#pSRVNSm7F;;xFvDc%nEBv5-
zZE1L1h)R^s5B_8YOdsY!sE^~od!KC8N}MdVR7@qh!==$&3zgotsl{H?8)74ESKl(M
z6P><JXeY-wysKl|O?iTwn1ywHd#siS|B%L|X>TY(E}EZ6PN}^*@<nO>Vpb;l<6r6x
zoqzNE9yyFtrT;zf$>qa>&bQu78NI(!z6QG0WBR{oKSdOVU-R>m-AlG<)=3C;J56p_
zr}lpLf}Br-2lLuz&b!DqXLMUcb;Q&!?&Q|$+vam9(6<bZ5^g8X85<&Jb{>cSxY0nM
zOIuS|`CwlZ5%&tYy*s@SOAdXx=T>dsZgi1dNl7ynAH*4qtbhM2ZJ{d^g(8JaSl(Cq
zsC{cZ?(nqJH=T1XNw%FxK32uC&F+WXf2qQkLw#7cB>l3d+u2i<4x%zsl5!fb>8Z98
z!jq;!Mf^eIUJL!3dDw}mZ;)@!j5!bf*7a~I@QXbT)44cpZ-7>|S~RwcZsnMAzH_Nv
z@HA#a8?~m=lu-I8sW4)#Ek1HNxlVmLby0Bb4wxP0=XdjUiiYS>qdE>Pl|3qP<I#ud
z8&9{gN5#ekOpnmhAB-~W>Z-q;>vN3>YBxS2vLk->)ZDrjeu;rTHxs+nJeA~=ry_zj
zKsN2`R_LG?yQ=4#A}Z6T<@#qEWIS8E3ecPN?1VQ?uL!F1^*6oTc*oi1ZHzkTqBZth
zmd>*oZ?<`Liv{9V>ny6umJJ&cGFVXYE_B-_0Y0(wcw|J)p0}MguC-eG%6!n_l$~@C
z(xjr!(r(|Ds-ks$cW>uYSHo%Tx5Z22n`3Wwe0z!(u9iJFr=HsE+p+m>`=*#^Uqnl3
zS=!PEGNGbNJ&c?JFUYAF!B}v7lkfeUu5?KkN-N^JO2V3-wkyHcilr5;P>#ypNhk~5
zbc$2l)C_si|J7_U;qrlr4roZGq@`dyUugFAYx#h?Yh87y-v^3|hE@BofBMO=KXP0U
zD?F+bC@vBnDkcx}QhFO0Z8Imm^X}c!mue&W6sxd{E<4%=rDT^+*2-DRnVk8{$Wj58
z=<s<fCc`b~rZ@LYkS=EH$btR!#hk0r5e;qTF%c@`8e>60nz(q+^5o|eX2*zaD>XLF
zn_nyM<D)1iJ?BC$+yLLtMp-PR9lh%m{aczK5WAUth&k=#(R$tV`X1jupk63f#rcy2
zdpZ4Jqpo+&YdC*KD7Aceg@*xi_S>>eg^k-RZby5O&bsZQ0M^x1cf!^g>GFGL1y)He
zYzC9V$MIixo*ZLK*6@E%U0A8JkusWfzy9^(yv>z1=93!3(&ufzQwBaMT)tH?UVCMM
z{gahI?8B(63dMT3s`J<2mAnxZm+Qm7cXBST?yptaR3eF1-IwM^4~*so3NL#MzvB^K
z8TkFhv5P8aS+D~?RyLKLa5x|E59l89arKK{>4y<X`)BT_Q0`~czF8>$`!oKh%k>qo
zTa|k+i|&{nfp@d=rwwqT#P9LXS5@yWz?j&Bt<Geu;O>b3{kc-*$m_2JU6ZEc^lOWp
z8eS*05ARqVuvzUfULYB@<lprThsB3{;j4eMr*eU0^1HrdrRJE2aj$G?=Id~~h(rC0
zy@-E6&mjVX27i$*dsshM5uWf6*vuPq8W$J@-dSP)_{u$-p8_#^6-$(_c26YlOGnRm
z4!;Bc1KKlw@f><rns#OV+Vi6?3g7CVwEdh;nALjQuq|@6{?;QSqPD4yAD<grL~l)q
zBc@07TE%R!q)2<pW%a7ZV<n!$d*mibk+v_8VW*+<*{hMHOZwT(EDJO)<z^c^|NLn6
z?txL0*N@j0FBFiE7(MTIt`uLszkQbpIXJk$)%NUdyBBaO+wCWL_u`d$y}6aD>WQnz
z$CSj=+^b#McHxVCAAjEIyW%B&!;KRYtSj|7MPFc(6qr`6_4Y#5yCt;RxTx$iD;YQ9
zanpd`h1{;jb00tM-u53ljbx76#V@*KYX9i%U7FJJ7j*8J@wd}^X_98wu6CIiky}*r
zrgAwlqnS@T#&x+7ySLeT=;oTjEQ)&-VRWCXCxTjWa62^I-ddkksVCt`=hJwv!LeYU
z-uSJu$Sq-MVZXt1JKQ;3SGkx~a@Y&M20J55w|9cq6xkY*KmEG&peJ-BwdCZ^Cm^cZ
z8qyQ}>^CCz_xa@%p)|ktrWa_rGy9WFGSI2ba2Vmyht#Lk@1tquxVMV6xw0UkESC>G
zPqGu1o0w}Fj2ynmo-mfjUagrroK0R;5o(^D7e1Y=?Vcf{@BOm#>Ogzsx8>KJD1Ir=
z*HKxBOv`Yg%>q)`t8B040c3rxRuc(=FR!0m!#(p8UF@i)elDs~QG)7Na8S-64o2{D
zjt=vR^8}`fWy2ji;Zb&*TfTDUUc*jk#z8rYcL*R)+rsHl!Bez+%1!Y`qq>;rSIv?5
z1C+F4?s{sZkr%OZMq+)FzI5gr$(OVD-d3HxDyuNG>SC~?>_u>czpVGtE&k;RzEXti
zO+H{+tF5b&L9fO(0Ah21w8a~E@!e}9eM0TNds)2^YaHMZfVSYoxWxZz_dm^Jc<$>=
z*F^B^kCkS6w*;KL>||$j%w*n}y4c<b>XUwDT1E8yur@)pNsp5bb_(kR`H$9m@FW_+
zC~PIU1sgajcVLw8{c@5hG9cI~!|~Mv|7d}=`a3vPwqZhOSWk3B^hKXu+!4MwiI`L;
z;%e5q^!S>f-e#>T5u$DDJo@_U*9c+h2EW-?s}%xfPce~v^)^NJV@E1n&8pBOyX+H{
zqGCuxi5J(fa-;6XFqNRgT0OB_^;fFhNCo!6q)eIa!krqgy`XR}qlJ>6Lv@?j2IFH%
zYF^Ntwks#8mkPK3N`>oH<mx26{+W5_j+4cQX=Ow7$Ar3zG_U9>enLPIW70+0SK;-Y
zV1uxR<J2xVJ%;%_=E%DnMeh26djj)`es=Nj!q1^XsfPywNid&0=?ltIhaWBsq944y
ze3Gh<Gj(a+)eUp_Y4c(QcE8r@Kp+|c&vW1M{s)A~`Z<)fr}Qv_3PX&G^iPi7`~L7b
zT6_CSFGibeBzu<~=Q^rA{P&lN*Fe?4n~KtdmGmY40Yy;0r+z~Cq>#swCPzC3Tz*ou
zs+Z8}WrtZGDErlFI5nJ0H9=|QlCZ^<5|quap>Kr4lG&>H@)?7oLxG}A6(du|JKjOA
zS*%>>HM?pfr-)LFdBFs16<UoRAj*gxDtXAD;uFikI|BM)W}CR{3e=GDob-K@-XGKP
zi4|3BvauhYdk|JInoht`!GwrMC6M>y$T<8xX*MG{RC+*^0<0BcmTB-!VD0L3JAKg}
z7R1FF6)Z}-1Sr;Dp+&Dd26ShPbJIMt6@_;ew$IPI`@h4+&#f>#X?^}6TPs}Pu~b4Y
zX>rs2c$D~C>B!Mh5of9YK5E87d7x7%<KoL9mXA@hKhUZjI}B3CC-ns;xUU^)QG4xj
z9xnn?H;Snvy)xBZ8#Wih8YZV$p4(XK#<qt=2F4DbI;xlUbaBlfYF|-$roDEZ4HMX~
z%6I3)(ocGAY;&d=W?q7D>kzsrtq;yL;G9~#Xgt3QzdAH}&v<(0(XUHsiNX`|e$zKU
zrxnTM-IzP=;SiM+Q^|jq-ym%)YPNX@nbk3b?5CP;U9dvMp_b9%xpEzA%-8SP2D(a{
zni+bGt?dGy40xAnAQ~doIJcdb_w`gP^Q{xoP1TAzSZokkyzCB9Ei~NDjnPJ5a(7Tw
z_dSuXSI%Qfk41PQ$V?b6$JO<I^F({9lc@Y4%BN5DaG=NB<R|Ic58MZpyZ+)l9{x~y
zt;?pAG$|ELzM0Z(qT^)YW{MiY<lvLxp=*cw(4G#7k@HlR5zK+X2g*X<@iNSc{3
ztf)oO=5uB1nF-~;b>YHvw>ACjuyq;Om0rE^!mxTrCQcV&@;RqR`~yDiP8YkUf4hX#
z-&<?DRkavx#gubxWuS8Ro3!{IpHA)4eaN$<8*j$RiRg3UO&MhFmz7NfH@Ic)fs|@&
zu!E_oW^1`Z&|J+*z0>@gaENhw;R$i^0+;gYhB{M^j5CCIMixW&t?|m)wD^!mA3Z(?
z<qi6f2+rFJHkp%zLiykW7mL}-&r3q3&=RLlz37_z1y(<m6b>)y^vO)#$e)P5Yxn)W
zh9y_<PO#UGbg*@jLF4qr*5}r9?thI=_?IUSohL*t_fgXusGiTixntf|zx%Yf(0Ad+
zsblReyE`@Sf_(cMOs7fbuC3la9~Jf1wrU3#y7S~6<E2)F+E|-D<LlugVPB$cbE3_d
zy2d^<6CE#av)Sx`yKmtpaG#?`*`BLJYH71nb+S*NDB)Jv;d48M(wCe7K4au0WA^m1
zA&|z-I3WdWnGwcNGzej$52RYlqq{3$EMdNtSSJJCOzc&3bX(h)3r<fH>n}9LP(6y@
z1cq>M$|gV_d@xYMCT)pU7BG()4432wmc?ZOB*dGBRyOoyU<dD{2t=s(*o!FQ0!W6O
z>H=`?6jYK^ia-F2ag<Tpvz!T7SNJ#d6`;mUCL{>S2p51qMGLC5r(2DgznGDffvTsx
z3lSOS6Z|Ko%wMbvl6xnfey@4G$}H8=NSEj#9v6q)@LX#MJu}cFJkV$Sd9dUA+_OH6
z4mU!la{4VeONKwiqd9J+ca)ly^;L%t^+axL-%dqVua^GmSMOp@lQ2mW)sZgYz8%2p
zanSixe%jKj*w0zuM`%;VBM;IQr=@LhpsH$ddHn(%Pxb8&wy3B3fp2jeS(`P%4eYrk
zkVP}quZLY<-tfqV+mSU+?RzZpNL57i+^@>6P`+{pQ&s775SDBEw-7GAJ1Yn}o>M(o
zu;(AHIT+0-RsTjPj3L<AsDTqTJ+1acI_3Rm>CH0VJgw!&7u+Ivls7C#st;Ad?A3Tr
zO3M(Jt9ATzenAQCug1Qd0n!`Z+b;Tgw#Ax|*;%OR#v7^f`P1*1BQ1qv80Ju@Lc5e=
z<I(vF-M0g?ox#Gn)#-8l!%{tU&JO;f4&|u`y^oCcx%-P|(fmbT);fMA-Cs!Y@ZQRs
zm#PR}SNvRow6z84RGtIn%B}#W%^PeQl0Eif^%1jzMhcFpyP8egE|bk`1rx?F%!-Rq
z3$FiHuv_B!%O-9${mDa}1Z6Ah*?kp$HPa72?ILd#u01t#!=s1TMnyuZpwBuk3)g5-
zkTWd*$%@vKlLT9XFq=>R0bLNUv0avZNWh8=I2WAH^f$MI5wo0dIbMCXI*)3;up)<k
zT2aa-)a@#LMlSdnk6a1I&O9DCG)Wv{?y7gy8D*sVkS0TE8@1cAN;Lnl&iSqJyaG{E
z>(*V3`+RnPrD+}-<-TU<lvrp)pG{qA#BA@&4L(lssfGJA8?*2=Dn)dvJ2@8@Dqwl!
zJLE63bgg4Izr8r4lf4P)NvrH>H?NZDV2pf$6^&@uQ0hHLx)~FXnQ2Q^Y{`bMNby$O
z*ZEcH=ujLnVO^Kj?NmL$%yV=^!M5o75COx%X?A%TWgT9}od3x{thq3@GKa6?zO-h*
zhpG5m=)WcUrZ-$$c=cy2qC|2U`fHpMk-X+X-Hj3t+JlCU{WMNxG@r89xuxyXwzN~0
z9Pr?HxN+nkkQChRP&iRFwtdVYV(IqT<w5!%*pK2=*+zxqPBy-8E^o89ZK`^7SZ1nF
zKIT(!HDT6Tl-}_{dxF#5g!k$L3{>09x^S+6l(fW-ibsL>T9oS&Q)bq?pWvWAk4n=k
z81^i9J`TBzeLQqR&!>?j!R=Si(AFUEAqCuyfOUpQvW*t7>Z}45FDJ=Q0CNDTjJdh#
ztiS?KGrd9DI=S>+DV>kQeR5#N_&>|$n({0B<%Fzx1>jQ9qioU`kgZn)ryEnF?G&0P
zHVXxCh+%}`>9RTtWiGo%bX9SPl+EHR0#QsA{Z$NzP5gC&9J7yxg)yfB!?xL*GW>${
z>j&tU)Ck?gLb@YZclz$|bgKnjAE(WW=c>l1Gxx1Z2@`_*T=Jw}l&!;tkcfnnXfmjK
zvuE6RJp+m4ngzw-u<f>zkpg7=&aUkevs?SQrP^Ydhd;#?48|BFjv!ICoOiAogSAg5
z{4FjMtC4a^7gg)PZ>aqSrE%N!#YXXc68D?@&lRT+?CMtoedmJ$r?8WS6HuwMCzdQK
zwe^da=Ha&!jC`yexQ(LPg<?0%{rNbU4DZhHi+mV(E;jc2Jth=gydpF5R`T>~ElQ4A
zc{cYPd8$%jrR|o2^fCV4vlmyRZ0{yhjgIpZ-(EcT#Pmfd_p&2giY|yTu%^N9g+a|#
za<g$`$yxgk>v4`(lw25f+xzZteYl|$=nzal*%JP)ZT9Fx5k+H*7w7ZE>liLUd2DD*
z{7}?WJyN}qV>e^}T)Lxl#WuzCO3G%lpFx=^rTd~HabpyRbjZlqvukI~oh5rdetMBl
z)Fw<l_T6jIT!|Pvk4~)Y($YPO*Cm}geya+V_{+7Ns_#ZS1i$iq4Yu=Um445Si26AT
z`$5fXiBzos3Gs^L5LkToI?iP-f6_SAaA_ZJx>mV9kwVuA9{*I}&)whPk~-G4xPv|j
z=1RROIk=a=@Rk_q)7rR%Ik`=mGbqXNyQeA=y7hZn`rGPN5pT_g#^734SW7os6hzyz
zwRu9TaTR`dD&DjH30F{d(Oejig<D3;N~f*Qi8iND{;Y5SmaPFRKxE7eE#8I}IQ?Mx
z2W0H^5o$8eC4-zK4xDw^b6V*Bz2`lH{L7^73+FKGn-I#vj5~MFZaHNZKbskesymk=
zaCCuvlF#DKL7(5Gp!7Ft#6!ea$+W9cs<S`rOY$jOgpeqW``2$Gq@vEc`Oe|LLPx@@
zJywc3o|~-vC~>TqIm1ov)UNq0bHR-=jp~q6P)kewEfY~K!AwA;)Z3rF&8ndlt)oGN
zn%Zo<5lIXcCx|%SigaBrSQsJqq{Ds8^EDc+zwlx^!`?_=KCmxljO?SDr;X^Jih4^~
zRXj@+{HA+{KOw}m2ONmH8ceztQ8nD(aQH^x!IvsI*za{yXtbIXVGl7q?rtrXlvpFt
zlPQtyshe0vV8_w9-g0~5(3=`_=uo}u*D!U@0Q~6@eqo*zU_QTwV?`<iVz=rN6X2U6
z$U#%Ct=1NJB(G-SJ)(_B|5QM*%e(l{@7@KwL7A_0w^detb)LOr)vNjT>E5NJyo<$2
z6}#JSpJ<0`P}m9uOv!y6m-#d7w}!0x+=(B}GQg#cjR4W~y4((+RzO<z<Q#rVXp4kg
z(X_p#YAlgq&p+g>tbxi7Ki>zQLSuIIfCbT|;S45l$@5Zc6s$j_)T@=|#>mV2R&J(H
z_;x9A=7b5*6`0RcAfu1{$pn8TLT{h;FHTv{<vu@f)6{ukoU+9PKOWda0axeM#u@N2
z6aMT_RCRrPpO_(2u-$v5(L*)*-n=&=Rn^&&Q_n@8{46gz4*7deS3cTihe_tVn_o@K
zz(BKu)8!M42PXe#IibHs?`8dpR8`Qdh;TCRslPwgFt$Bh=DEZ`)OxMIQuem`M{f1B
zE7N`4m3?eP2T<P=6&>`N%`Qe5Q#5W<b9`?1OUwxzu9F=+G-eYdJ(6Letwi42?8*K(
zjhWS6*d&QP$E_GlWh%PfNt8eJ!^_#^q<p)0ty|}T`*YFsxR_@=eWjL<?!_2u4RPKL
zJ&=~(N;0K>aujBF(e~Sr+7;>gDE(Ube(n7F!`1J464kXz#$H89jp{Y-p?AyLK*rj|
z-`1$6Cv)G#?gl;fYf)c0Z*AYf>EYV5_-=PY?Wh6kYqw%E`g5s-81VcOPQNClkca1O
ztQ*D3b<*X=D!9a;G?oczkkQ<|T;haVxhy^DZoFW{BU1cvMOb;{X+{@6KG7&`j#X2Q
zzVal?tO9113q6s)vwZUcvc5~IaZy3j`&y`*jN&8SvFOi?*W1;nU;Ug7TKM~hzTX{j
zhbyzVt+|H;*TIJgUtbV_L<Dy~sZBZDv*5v6;~%rswp&;n-m!p7d@Ghwpp4$kdnl~Y
zw0f6pz;rl8XNMJ>uwdEXF~odW&Qf;9%_0sSd51EjG0NA7liYXg<sb{vR1DA6wAj}6
z^FQGWh86KIfBC(+gS#1Z;%+-F%ME50n#KLErtUkM&G&5pI5n%NYHZrtV%11&N;N^u
z1VM}%HDXn$-C>lru{SZQBzCCP3e{@uO{~_ctu@=CtyaI^_tD?`d(Znv&UsGc4~{3#
zecji6U7t&ka_EG;aKp$hVxt)H=Ti8S&gZjFF;wtm;p5gAO3%;Un;ED#KDKjzE{P^<
z5;nly37lPPMMfpBlartb7DX+VDIud59YBw!x5s}vVNm?KazK|NO?<T8-zIgSgA+gv
z=%_E6xHjP5Yb>c`dnahavNwie^nUUUIve$V3GK$ppLPVt#Ynd~fH%1U#xH2wZjCj-
zL<#)|?Umbi2FF$k2wq6E${J8dSW#KCdr7X73s0ySUy3cT?mM}mtGUAWy=Fb6#)k*A
z7jZxOj{lu*S-o4wrr!L^S{JK{FN5^jE;-LLgi5@&<bzFAjUR<+6j9S%r<fkdSS`rf
zXV!u?1*f)>y-e_mlY%qiyOeR^pRC1cX`dY=6Yi+k-JhX<AbM{nr1pzE50_wlO@;ML
zU|I~DUtFEE@&+9G3Ere`w>;5hU6}?HTyT?@Ruw&))keP%R<A<rx6c};GtO$xOj>kL
zR13@>zk+j;z`mZ(g)6E#4O+Dh(yZfsDm8a-+CsTo0_hB(&|#W~y^xx)4x$^uY?2jQ
zwo-(ZqI>W*e%?n~!jF6+(XPVb2fe_jo{a2;uFY~PPF=Y*vh-o4UH;cnH0wi`RQ#7l
zseg`bzXvFp<wbaU)nT}QQ;vOPY@4G0@u0Q;-GLt3dU+PKxld~s`e>vzowGQmlq(M0
zKHh9AQ8Reyl0HM*jbVzp%&^L@vF^YOd6oL(I8}4TbGfTxU!?t92D(Mrb$6uQRH_{O
z&rURo8q;b$Iu{kbN<zMQ;+KU#g9wC~`c!Gm%Hr8|d!*Y>_W$H)jVMs>3<sm?Y-YC(
z6)NHR<#(DDpUub-)ke-!n^0DnXA+sUy8}`#R;tmXdmr6?m9U0)%biP(w5iwT8kNvF
zq?lzVPJgr27m24^*9M#A@sI)@c%imJA9z}S3OXi{%c&EBR!l~srPejUmsb86t?@u&
zn2cQ(33q0Ws%=}ZR_4D&JF-Bm^L*VCZ?R6n=gV0Y?XTmC>OGXp)$SSwOF{V_c$+KF
zHq{sQ2TJu-yrk}fYUuMo9wm$5P<+U_bdSWz$Zmmd?$ay0$1;2w@fLSzsn4RyB1=~p
zWHK)`FZFy66*JA;JIIgEp}A9oh4zX3qcwT+OX`hYnwj*^))7Tj)%1rNVSODNHD?s3
zDJ{X)*Se&~CMPc6r))mTYw{5dzNKd*K2WkL@;o19K|%M|Y?BdN&evfn5yG*V@5>Vs
zkuO!sjGZ|c;8*kBJhqV({TMtu#WN>=ai3F$?7r8lI%DbSD{oFVe0d|ZRz(|O5$-Aj
zKlfcUgD?`Nl8`LFY4WmsI@=NE8olPx!^HLYrRZ&mhGw5X{p-T3?JzC5wKU6awXVTQ
zdDHdz^p`<|u`lbL24ubT05cije2Gd@*b3QW3^<vo*YNq!Qe-8qid3fVO*y{R!o3B}
zd4!RiR5`*V$$CO#6t55Q7s4gVIE!024JIl!CuUzq)FxzpUpm^5oGe<}P3xc!EVenn
z5}8T(N5z5rihnOT7!&{Ocb?GbKxr)D;4|^SpLWZ3qpe{J?Xa`jJK45w5K0<tG<bNS
zB?6V+_|brO&}~B}=Gpkd{Z9VW`%Y%wu&5{ew^G@9i=lDU%(nb}`kTk*eb@Ngd=)SD
zeo~Two`;J4Nbnl(JD%-GX7HG3PG~=p<=&`V1pCZ|u*H8q`HbFEO}n0PaA&33Sfely
zG`heGy|c3C@d5p7`!4+2ih+zhM$9+0nj)T0sbPpCPVdpul;;|rY*#;IUO^_Jb076H
zfK0i<gZPd6eg)a3t?nu#7U(Xb36Awk!O(nBNR3Ov%m%cRtJ?F<dLVUlb1+H=v-HZR
zT5fpj!@I6t9=1`w+FpxSrlN>+SA)E!xJyUtN)$p!h`NV5bj$s)_V~l*sy9Y3HUR9^
z&0FX2Ee5D8VJHFGm1uz$E1;d5SkCW90|LYZ+W$Zamcag{B$XK^4yY=BsUM;x&YUO`
zZM*%?WoX^^%o58dU%y4pltnx7lsmS^orE&unxmn+m=NO27f3UnN65aTy7S7#$H%{m
z{;_k6)G_N`YT9k8Z+N3`KIbG>GEu)>9w^eZ(5wO`cT`Fx2wEx4!2<Hdl58U`SaO5%
z=+bc*_#bAh_K5w>FIlgqQN~^|>Y1K&&+3%CoFv~b?J+Tlw0Fyw<OfQayLG#6{Uh-F
zO}t~;<2@Q#-cabSGcK@v&r0iV2n&=)OI0f~QTwWpcVr9yS&ZuPWU6=lXQzGRA|XPE
z%SQ-uHUko+Z)7}4y~IXylUnX|8=3h{W;{geyOHQpMvP3<*iO;LuGp2w(Q7jmFpl4x
z#ZI|rUODs_7eXT5(;xE%+i-hnGq`ip*;#}StTYw*Kh|U!evvXfNz6TTqOIlCu^Rc{
zJ?^}V4_FUX*tdon*BxTr1#daC)N67XiSMX!rQ8M)3i}Suay%P3AB1FcT<pw$kx+RX
z{xY-~mKBjSkFQ?w492U8_Gl2OZEf7#Lk3^iWl~bC?xEHPFMf7x@frwLpYB_)lX~~G
zaZ3!Yq|#ccwr|lKrL_Mf^TH==&YE7+v*X5LMfY!cd=C-`M-zSy-*!DB{8PIR`$B`y
z#oGADAaec@AAb>nZ}7jjz3y}M-cuV`okzlrC~&-<m3V7!MA@$>CuJnR-;SypdDu;=
z?07(1=4><Ua<|<K|I(A!U!dcHP$OSASr{bbjK#d5Rp6YS)H<V2UgK8~N&)SQUgP;X
zphPD4Ln~f)l*3=d7?I$;(zso#b?SUzIrMpX#2RXP^6QaHn_^{Dsdm~(GjZ?)dI9cK
zjQ=!8*Q~korBBJ~zV!_2_|+$ZK?D0=8gomb)O>;4dt^%r)%x4`g~Qrh534Fo-@wU1
z?+Kgv9^$Z(c+J?oqmBxr+V>+$9}1Kb5*mTX7RZ!Ui2+HtT&dGA*mDsGe9bxnAGpQ6
ziO=}0a}|eXP_rEC*{9grNnHz@j2VCM>VXEK036tzQe<2q<{itTUKuS(HZTWKjh#1c
z<0hx$ReuivP*xW@{T+`W+Z#l}Z^d8T!mHDD7WUrUhoAzaQIp%S?DSI4{%EsBxGH<I
z_}c3`>s9UM((qTqWL@h&N*AGH?$?4ctPC)V<KzVF59O<iTu`x6&dTb(&wk9dx{_Ct
zvz#`9&a;M%4#->*vLLz`Y2l25@A{v)yf>5i{98~PM(Cdpf$uY{3Ywq1)he*omUz3)
zrpXSvL(I50<W*e2P5#Lx+Z1Da(u@D}rty)qBK~`NoBSD(iP(x>F0<*Muj0f99)ylf
zTHah47N$?{O~JhQM5ec+NtMaEKI8jo<5L21q+QwCopKP`EwXOiK>mz8T(d9~Ey|J?
zcL+>+c|pOyuqtW2XV`bJE$*LSu0`UDgJs)-tB8^4qF=v<)HgF6A)lHkMZH>E+qZ&S
zF?o9?YXN4a{*ca)aeP<I5~iyd*Q<xR_l%@&t=T43D?O7z`CvBwq#PF*D(gk3$%>mT
zEm{#$ntzpCyehd^IltIRvWJX>nsxHl@>oyqT`gNsdcD~3oviTF(|Y?kx+_w=tR|$L
zY+UW&Ze(k<Y(Tbo*zx?5vM`sE+Hy#z&iEG05kLK2#bhvg*Qrb7nSf}2=KR73gP+MT
zEx0|&G-OQi&!{(^k3R6s$Iy1`LyF3z)}tO>NRj?8{x`uuTA|7w)+l7OZ_WP=PRCne
z+E9QJSX0<GZjxG?z=XOC!;OxrjWUXVxFmz))6*;TfamFzlGFazzb`OwrUd%`>VX#{
z;qYG?nwzA>+-a)nD33WQJo=MQ)z1e(>)ZGBS!?icnH&N32$SuO(ewsw=eZp%!Q?58
zy}7<J{VOhs7m|C*f?{|>wq!Qkg@SdS7(Hp6Ey)X8DO>DK)ffxtq(|SyuLm{CmU16s
zvhDfSt#9C(Uw=KUE*TT`-?pq+<!VSzCN8uO_;{q!JT6T>y4a<4H1ITHqPJNwO3aGV
zHh*6DYjP&!k0V}XEq}9@6gbjm8u7AAPU}VPOE2a1)U(j_K$=z26wEINHUkamKj-s%
zEZ1uKQp}`ma}$fChg-YU?>8$GZ{2!6=O{SvU17Zyj7ww@jxpN2P_gChT;!YYMxR2u
zqoG4cOB0LoTr45O7(yI1{LOmeb6ls##LoRNmz|mx>*spC64ZpgVIWGku4lb}yVDOZ
z8od)S^9XfxD-uLTs9cp><>TvM3N}mTQ~=sk4(w9>tvS`V)G&8n`Y<n?m;`87s~eQ?
z7cP7#$33|?XxXuCinjNPsgj=^j{g~BRo|`u;I3#BR`qa=#@h{Hz94TclB1~K&HvaQ
z{aAJ6XxJ-*xd<NAqa@w_XDz3ZPjHi343}15i~Na^F4VE?=|XQ*-mz_O^?8ze$hXQR
z*EFo%WoTd~{ph}o&y9KyT)=8n7~@M^lWd|@ot)XX*=3i;8zTzfqzKBkou9DV4BkEL
z>!1TETzN9vZGhtHK-i&lRQXX-Fbr~`sJ9a_YFlp7CW#%qf*E$I1QYTBeC~Dg%US&A
zH9XpI<9o*IO^NAH4#9#LZRw-zI`Mkzz<V-&t2VqYgb~U()jUrPn)wOtJv3VwuD2kh
zQdm`!me}k=9(aRu3g&2S%xIHyigomQ&qRNpYt2<bseL7q_wHRxQ}W_{xB2$CpwJ=_
zl|&1=wy~)>Ay;)8hDVSEOZ`}jKZm;hz22SY&9qLM+3{0i%flv{_FX?rPS{@`mAXBs
zKos3p)2@~~G`f<yrxn}dV`KIzn|4iut)<6jyy()|$a+dAs|j%zY?rh4CR}t*ic2@u
z9Q2tI;T}3x&a1CKREM^f+y}HQuFA(d;{?Qi4pfTS0|BgqrS!sNQ)Ux$Ksnxd+YEH(
zG6KvXRs76brsu+}qIM61u@%&y`}O>fVy`Yq2|PmDlqR?~|9TOGT6ZCz?WG8XkR=h8
zm1RU5f4<!lE%>#bq%JkEmUML}Ac9atpB-Wa6Kh7le36b-iR;kb4wN)ixzzo2u_SOE
zeLc~kaJbr3MZia>fomZ#qbMuyK}Dyaj)n7kEta9D=pvM^+NGYxQMru8hB&5IN#%B!
zaj0B5*3*u8e}eil9c4+_^i@yOB|o1ta9|l|3T}Xs<mCGYY0gUGdtOg5OGt(DZJWM+
z<2hszo4iq_f9W0HKdzD1<0QXZp6EKmtanWf?)@}J&^P69*OVW~z12!Cr|JG$@qUx5
zq&qLBW0<Fc;)E()Wr!4u&w4LI&@o^S=a7T>q8L*9r1LmAcW<b?ew&bjo_;n_BBD+D
zEp=2YviIHo!%(AO#%z7*qb!@+o+)W5nc&O8RgO+(9--=^=RR9Ns762D1ABg%No1mR
zTF2Vr$`}|7qfY?b3!3iJqY4ZdAfEu?>@W2O1e7Izku0F&AJ82CpD6T~bitC%*%tmJ
z+<cLvCeQ)RS$yc4Ud>*-kvRjwNT1I(dV0O%>nra+w#k%gKIBZ=mdrV$Hv1%{4;?)g
z=&JtpcSh?*+n;aBp|!}RpDPCm;G8q8xQS+AbKrr^Ec0h#v50z6f_BM9y<$1$A>9pU
zuF{yWZKJ4Hzb<*T^j=0-RAG5H^4@@Ixs~sl`h0(L-m!OH*u_am<fSxI#E<PA?Zm1G
zrBt6z$z%U@y${2Y6q7$6M0<x;L-GpO`Xc$;{M_0Y(Q1`biiuP4DB(pzy63h1BYS<z
ziJVi}Zt+rX^HFECy|3-suKa$M*S1OW7}aduxo$uuHcphcy2PpN+x-^zne-@FxRTJ|
zV`F2Rd@b{mTJ`sOS=P4x@bhYw!V}Al37HIi#N@Em#rK15eA?B7z~nLb*Vg$)vB$UQ
zmATIV+l$7N&FEo|g^x8CIO|%k`|z(KvmNSmy(?V`nni+FlI#oJg$v>s;x(pyXA2rd
z<-;UNp7pYUrc$*(EfUR)tTUqybXPK<Bo~r#;M)Qh*zF)%NSkD9K|lyye=Yffn?;oZ
z1MIec_9rd|a~Ob20VLxoF!>*AGFYfms+fj$-s4$h81Wj!wXEb(1;}YQb?<Rn^3=03
z9|=hggrR@tY#p@0FqB<j(gu1LB5g3V)9JcyhS>>5&FSA`cJDbsb9J}P4OsW6uaq`S
z*2;>}#vhRreh>O=L;c!rb>o)*komqDTLrsX^MCoC7UFTu8v*v2)T;h3Dg|hvfIhVe
zen$YW<a?mGv-3Ol%>iyfiUe30)z?Rezfc=OPvX?9JgkL2zYg5nrWU^II_~JNF@@h&
zxgJ|gR+nF~-p=@J$0_CMIUBCN4SOepssJrDOMp9ACxiIY!tJ)a=T(SF9cLIcq^2H0
zR)rt?VIE0*2isSMP$g$aXjZD{^~Hva`HdqhFYvM-jy9>79pL+~JM<(Yo73z{+|@{D
z|7=}y9PU=#48qU$z_!DmH9M|u+}1OVY)i=Q5V!aa%Kq+LWB?`m-Ky^A8dc?OYqYJ@
zzHnWDPps$cnPxvo)@4W(ZWM|&2}qP)n=)=UQ&Xn&++%Q)o+wIFNfc(gg+1#W&FsL+
z-u$F8EQ7^0R()?ODYA0#P8VIXHiJcPJJy)FZdh4;MXp3F-Lr78ultF5e;E=MD#w*5
z@EQn4Ko*8TC_)ni_bN$c2ccO0ic6<V7~Lt0cX~!}`eFeC9ux4!|Gt1C;#0}ak4Cj!
z;iwX4D#~Se-tNnT{S(cL)l%-O8~cv3^gErb@muZHNnhk<@=4(W*QQjIw;i8pEiJJV
zZHC<LRE-~o*+dwo^}-ULuO^3;ch%V7{F|OMM<1TalCUiuQQmbhKwg#pm*y&@U9WF@
zy7(fkC})^RQHJE143kih`+cWQ5`FU0KX+>2`{e7l1aj|5!>wlglgbisQd$qy>I51R
zZ+bf`yWzxUO2U5q9vB-!yqE8A)Aosa9*0D4Xsz2kB}Y>HG?9M6TE2Jpt>t@?rZO$m
zkK@PpZG#+?4Xth^eoaB#nc-3O#}%0)sPz*kEvkJKH)^YOW%VFBNOfc<J5JLc9>NxD
zzaK6XR|H~Pr4rG}0s&{(*3frS1!6*3lDyvtf2N1|MdBhAv9=GZT9b>@Wb}DtBY&Z5
z(9F+$#S(v^9rxl5Kc3ZWAH{r*7joKp7d#7Bv%6d@_M62=PwF`D{JQ<D$!!6MWp7M4
z6Iadm`j)pyN3fXl!jW2fqDJNAIOT$w+bU1#>vv>QA~tRx9=HW1o80<>trsTv<;;2<
zx3X-zK5>04J#&p@AZ8P1s3v-4#^}A3WZ=B-LlVFU17D&4Si8VKHx*cB<^HS2MqQ+%
z7S5*Q^#O%T>hD({xBy2rp^%2`Zo5}h6j8^YEJZ4JhG~3D*50SqdyJSOk~`d>4nv>-
zWh7)Ig3eIZUiRAK3D&gaBg-Et+veO2mnK=$Y9IMyqe($Mx=^hU7q~syG)|8iC3DO+
zo!Tj(F_odpd!|JE2`ND4m|bJyRq3{+r72y@i0;w^FCm;bXDAQn>o~^J`)<m5TPRwJ
zgqExeCoq5Fu6^tFG%r~yQCfZ#?>mc^b`JCbBTPSGj4Mg}M7Knb>d}gB#?Uv{7Pu1R
zOJ!ncXSEruWNp_9s8+YXa&L*(lW;Y!s$X2OS!_&bETBl_!*28Al6z1&3V>*JixGfd
zPvOY_D72-`r^#qtQn3rL?s2e$0F842#q^NeQ=QWlAY^p`An(6a<p1i&!{XAvCuoBV
z10zs)>n*SA{K|`^0odb>v{p;bh@vN7cIgTC#w6+0{>hqa^~(@`!rL8ijs4cU1@QHb
zr(bH1li}khS~f$gZ<0hmD@0Ir8+@geKN2MKvXv0`t(bqn18DsxFDp4#>5fLM7+5T<
zN4xa(BElpG7m^FXfsxT>47PsSpCY0*WINobiATd4DuVI`o3{6*re+MOi@G0Q`Nq+a
zIbsVYZIEW(9D$zaR)=pmdVlmZU=R^p`cZRFn3QcUuH5v^N5$?|8wHy$Ua&UfR{GiW
zwnqc~@%vi^HWO-9AP{<UR8o4Xpr|Vi0Aqa5#Y?6$*nLy!jkQVRlyUc@3vIV7^4skW
zw0Kyh*;rDuN^hl@_=_6F<b6*<W+D+}u46423qit=XSh&7(S%S00uf3ZI%T|#3}^40
zX~uMeXbU}WLtjPdFw@aYm-CjP@J3=PbEkLY<I|{vfe4V7z%%QVk31!L0k8S*bIb4y
zP`*xcYRUcgO+nAxAbdSY^R)+*1L)NER{j=kn62%qcOvq1z5EAEO3<PgM8ky~V@V>;
zsu8cw=ITM>p|#in<HR(R3l&=U`#UVUnk--<{`!Zqjnr;Q$N)z3aG2JmrS!ETx}+i7
zmxKAzQ8^=_iuNwQzF|N9!^c!*JRbWhTeM|Mros@V7>8MF*10NndDmJT*Ul_P3BUF@
z#XC740VcWn^Afua83at}L;<cFWl1a5eX3RhUE~0<h+qTJSay_IRSwfA;Ug3IlK~x1
zs*D!yMT9UTQMBD~Y3t9r7mAutMNP9b7=0@KJc7oAI)R;}IeohS_k~KK7#YujI%vyR
z=^)gWZo6;1y0r$2pRbR>bPiK25v(`CmQOKJMe1ep<<CwaaD|1Z%cIvwp_&MF8lmQ%
z6NuvrRr-HvF0LMOeWK;z`u1~wweOb$&ljY_8AcqQWzX4wX}S#eZOM|$?28P3(;|`b
zNt#dY<=r{_9~px|-l|5K?4<xHb9*{Rll;g2a+4z|sioRJLs~YqDs>SmYa*;%KWgiG
zbcBD)RnNY=@3~ZP$hAR{UtF7^WH0aj?!Tns^7zf(ito=*Q=jcM`tx$(>b2iD?!*i8
zbFd`ZC_c;ZJIhSt8|b4qU*E3|{fNqGE?lqgHLGik%U)|7-H6g;iOGO8zd{-7pY88o
zhy!I>ik~{X@&CDVjCE}hU@SHcFsMpFG4GHFfcRJic&bTS;0K8NU`0b&6+8;8Xrc$>
zEd{Z^h4GS#O-^N8`ceq$g6=s2M(-4H%X|eSPX(z!J!MDo({uh69P8MxDnnz&@}67*
z&G1{p3NInioE}L(xh`QzpIL*9Oot(a2w`v2Bw=N<&F8~*#;7U(bJqIl$|zD`fgc4W
zpzi)w^h}d;WBkQ}Ta()z9V;>3J0jMz8myK-rtVVLCJY_Aes>3A>P{ZKq15I`at&=g
z{rb~XljR4PN(JcKVUu=g{xC?M{s%olE0{NunidZezJLPAc4w(_4+8jiSQ#C285I!R
zr44^gfc4%I>C>%(6_0bE7k~f@FMXP&pqGp&)Y;WrEH`h{W%X~G4GJzBqdj$R>4~7G
zs8d1u?>$EAF*3F=B+8cfqY}y{y=v}$0Jk`FLE0IP4^zx!8pCF6#%3k>(ip7>Yh;X7
zS^kH@OeyK-A@j>@eBG=pZQw0-soJIyIQf~3yeqo;x*G&(&_dIU6j+yv94qx%PG)k`
z6DPy1WrZ!6or3I!7^UA{Hxk<@_RlT(&OrHG#FSJ_)m!6MuN~2hah3eYH4qkL*&M6?
z0)JzWp=a5C%;}w(jkvbrP31(>Nx!E?aXx8cSlzBDdr7j{P>F=!Q-x1Bt9T;l<hcsC
zVOzi^mknoEs0tg`(Pbj@q}TA@ZM;Tn15JgB{7ciWn0c-T=kPJ%t{c;L-)xIF`+|N;
z=%(l2sv+shl^BHtzoL6JH`|X+YJ1n`aVCwixC7=94VL6{L0Wm+qnnOg*-u}xZbxab
z{OW_jrGaJsHCDy`d1CMF?BB!v)sO%+kfj*-v_23B#NdE>eu~6D1cF`t=d+>SiZHt8
zD8NbMImgTZnE$1MfXd`miVsCdons~a71G#Fxqn?-53nL?C8^0l76$aD?=t2%lW^Zs
z$1THMQ)H1t@+f|3znG+wWal+uVz+q{$`1sn8{89cX<rtV(>7^+yrShUsEtAJZJ$jy
zZ?%qX>EP)nI^c#-`K^bnmRmzlR8bqHKy9RAiW>oe^|hM*dfZ-R&wss_&ke(&XUta1
z=sg=%Ql8a7p>lzwR|<83w2p|woS;;(Wt>I)N+v!GwA;keW$%+eW$+7GTTBkp=;+>G
zHd~rX{E7g=(Xq@eKRG%vM!-6wIY2RzTw;?ChpgIRs-TBTMM<%-ro9kL+FQI(Aw;hX
z^6#2UyYv-*8>9Sp9Z&@LU=S;VG2co_xp1+G<#c-H5fMQ4fp~`#IQ2Mldh!9Z-(tU(
z9hlsh?mzHoyT^0RD`-M+M#jf4oo3&D#s*yG)yww8)|uAHUwP46>hMdTg&9h1yMb-3
zJHlVBBDy$z$o7YNvCT8CXPZU~FSPD~2RaIC*^K;S^qZz88{t||b~Jsh#c^s@kAa(s
zZQlFH>B@=Rl%x3ES)bFYaLWXNhsjUP+v_J*<5kiS(dj<eSJtHKltuRNC7b+09bLti
zmAvmt?WQ<$rn7A?LiBl1ga&P)S3zYWYmC?8&<`F*JTPxmj+`lMU4G7Bu)YzOGtQJ!
z6`FICLvHOwx)fQlg@Y|1uzzzNl9JJ~NidC$L`J8jU_L#2RBo`WzU?Y$v!dFh8L&+3
z)$jICTxmSC`nD4h*b5~x_<E><a?4k8{AA)De+Bvjo7%iuSZSy$g>b2szsz4EinfW?
z?1R~deLXY)a9Mtj9LLK8ErWmqn4M}1#!`s=#j+adcyL@?2_AFGFvq9vNR_jsgn;#m
zZ{?CeL*p3$6K)w~OaKUN0)S~N1GPvrnN7eX3z{qCba1I|;e<3}lOAx8?x@Z%{e-eH
z;r+%XHZ_(+>xAINrO3%a7jl1T12+6h^tO%26PIHfLf+bJRYJ#^Qcz_NP_8EKcR^*#
zv8QAsnlRlHDrr+5Uv&(jKx#Cz^*~FVP^*i^8L&q~_Uz!dTg4Iumr{=t?5hdW|I(};
z=uA;w*F$l6jb`uKhu#{)K-{3FOcV`?5eO#2AWQE`N+Gl=z(T+Z)4X>Dlq{{08w4NF
z7lvdWGlDb6Vlw|i%NAZ`rwQLviCl_f#j7L{h#`SLaNw_&53-~kQ0hPzl3SxQtge`j
z5~vQ*NC}4vF11>5@6Pws@5`4_a(*Nm3Nd(TKoX+ZJRLAY<-&n7kkZH;Fk<?~xx@pw
zN`a->QaC2Fr$=Gw;&!($Vb9(3y-1KCelfo3WSQ$-yp}>=O$~?MY{V~gCMJ05PeW$X
zk2*7r=^)(JKup#lJcXc%iVqrLL#LQbE;$&T`#QElI*=D$9y?F=nAcFgRrXRp_EHj_
zU+QT!t!|+}Hj7R(s9sN1#P5y%b%7}k2Yt@i=1jtfsMylf#suG-jK<T-vIp)a8ir2`
zu@?);2)IfwLiq8kw~Etc*^6NsE!PnBWtEXFgKn`IW~lW9_v?aG!%w?buruX+$XH7c
zKq;qBuyz*s;Temm+7^k|&w`XniiccuXsDOiay$}}vwlpG*wX4mx-BMJ@Q5!ER447a
z<-LUZ^+@y5g5!iUYjYLDu+|)i?cJ{YiUs6G2Bb#E)#6>^q#bFPn@8cawHD|EVN#U9
z2AwuzxXkB6=qxsX&&!(<lr8p13y97Qv6UzotcB`d@^<LSXKA^~+rWh>4E8!_==2@0
zVS_e^KgnhOp1imAB=Dg?3v7_ZUMa4mQ<AaJBECEx_!+JY$W!VCjn?lPH>c>?+z=sN
zsMvKBEEh;a%LFcXA6hUU?<3U8V-2JzkA9GVk}bVr8!=MFff-|zkXe~!qb_^58cPbU
zoKCl&2IRMq-<qjSC)9*S*K>2(qAR?~_bwOucDC?=IyM@OjmA>3dX;j%Ab{sC2JSWv
z6lxsLcqRu_jQVO6RcST~8KQC*(;h=QyR}#+PM&+6;g^Q%GAQN7oJNnQdzxY--mxSK
zh=72dA8;s41CJO-7(gm78qOgD&AoM1qLjQRTixfSYrb*_kc`+$>txGvl)!okEPj}#
z8b)wFPPf93$HQk<!#P042=mc%2=BxBeyaTg?=Ws;w=lH<lc3O~^q|swp!x9kIU{ns
z8}cJ>ZG<saER{<O{k=epnP<#KMZ8&vZz|qs(8|G2W%$8*`sH_|B)3GGXt*}`5;#|o
zWhgtuNF{3h4^6GqNFdsGH&dABPEwEp6CuC<MbWjz8|0rh*TiymDE1-OkS2cSmQQHx
z`4lvk`0o#t{<ID1tb%?-Oq7XVS}<s-F|~hZnD9Np<%QO_v!n#OBAfT$sg?iI+?kxQ
z;c1+OAek;ZWi@j-U7X6Dwkl_OHsENVfbN+{A)}RPPcD-kUN^R)Eg^Z5^))L*LH-}k
z6E+6Xkcmd7RLW=k^q3#RdgqNH8_|lIM0C%?p~gn=+cRYi{%i(;p9Z9{gKpBl==>tS
zD)uu0HQUzzOe@r{HhXv*OMf(UXA;|lIto_ysc9!!U0k;j@l)3ns?f5GuGv-D-&LgP
zWi7po8S6&ow#D%g0Sncw0w@{RbqK)%^I&LkLB^(}V*Lst$Rj$z*a)cNJKe|w`5RKV
z@DrH4RQHv?LuCMV^;C}31Oi-RJmSnd)VSWW$3XK`Q;_cYVL5{gxGA?#KUmqMpV&`w
zIp75D>Wbr1f~;WIJ2R<|7UMt12jqjEf{S3Z{AsrMEHOH=Ul3tNpgw9uim%$MZ<UKR
z&3VnLJP@#!vF|cKCAo2+W#HFhiUmOJW!-Z?ehEk(Qh-jjczG|EOwn0<sJC+vk0EOL
zbu&lxY{|-M^3e^eb~e%HTR<k_-6{QzH0~%H^P-WYMiam=I_^}~iJ)PVgdVH{mkl1b
zgqrkU%#4Q{Nfaa$VV@aQo*!r_6Hc1J`+<r*a#_Gl2i;21=l*X11rQ3Oc!}13xV3*3
z=r1;Idv+o(R-C=oHVm`FFi>sMCcSYn<CTJLYBd`-==SgvQ{rq&ZoiweWTRLPtO`dF
z{UVGpN*}r}dzvF|LQr{(vlE4RP5FG!uXL1geAqU0hYAe6Um<%-KD!To$L@2}yw!{?
zka5GQu-e8bvz~9*{Ye=`-se%paZL!mZ?IretV?W!=Nf{@4>zqYm^R%#`A$B#vH7X&
zRSPn+Fl}q3B+=5-#NR3eG=Aso?y(Io^z&Sbgwul$5Y@-xQFo^sHYf3OTfwZao9RP^
zm)+Md>a<j#Y#%;!!5!8zoETLg-Y3yqMidFb|E2LiW)}3Q+k!?k=(L-*UzqqAFTi()
za*I`KosVmyJGY)`x@rFiD?iy~c<!UnwrYS-_0(I7EGdlHW?E7mf8#3)nyy1~lDa9e
zYWD}G0@GwJ#U6rx@AYb_jaNm(wUszxRadW5>H6|T&kerRJKce@NdlQJ@s_cUF$paa
zJvuiLBhyU_OABy?568e#F7uur%NG*L$UT1^v9(ifTtO$}-ZsN|R?zy7%2CY*{XJ?p
zs5_|cgI;pF`osFOpfA|tN*g(GSMAJvX?>U`zAweIT>uBrCIJn6fVX&p#LLU0w2-kV
z*9;)>ds-+D447a8peC#`tOMgPp={b;d=>~X9^pvyUe6r}$<mjVQj|e8ni<ru)+ZAN
z1=zSj-cr9C;wzcP!@^}xRDJ_vvTo-ifameynb(sGuP6V!%Lpj7Xb^Zg3#d$*0p*M!
z$G9}^QSa+TQ#vp~<WOf87ydHBl#Kof@hw?dhnbdTe5rP41Q*$s<~o$inEDpQ;{%V#
z$?h#8twqghxzVx1^mi#0TOtlmrht_4GJnR~s`uq!(RQTRE}sy%&2Sgdc<D%^FHynk
zOuHsj`NsK$vT!-wv&M#8jfs{o6!=5|Kbbl~TMo1k<ur?HHY+F|-4c(bwR3$5<#YXp
zU%O~~@3YeC>9>ejEQB5I03K85jp)SP(+NFtAQmYO*D}j!B#VWYo0vQf8QpSVHiBJp
zO=T&jMfSmD+HZK#l$X;H+~tEz7$hOHfZYtVG>rY-;pG9tbchP%kL;A>>pxdXE;csH
z6&H3K=Ni$_S>)t2-XFug|M3U@o_9f%(o`XGTntIAVnPe~#AX%#;C17zIKZbH%vlpG
zd~B=p`ah)efT(93cB|HtckuOs$GlLqSN{CN7N#sRS;R2Pad5HW>0lre*$?|&auD?h
znk`;ytbAAc?k|B7ubC_w{@b&nK?gLQW7EMWJRvW7tW7&u3v9&xxkBN*u+GI-%dci<
zJ<W8_*QT+dA{5XcWjJHBt8`x+$csf^-jaFvs4Ffk`1ZS+%UmLoedMPR<4Zo0&r)W*
z+DE#u(=4HKO}hWmv`S@D-}ZbEg$aFW?gW_Co9h44WV;=dKX_-ae*LwfI&YP&mgx_k
z8HGMl-RxVRD#Prb-$q4e8(}&to-Mh33#JB63y@X%yp<{JaLbP7OD<U+)b;HHA<9uj
zq-DfcG}XrvGVAFR8BjlnRk)oMH41Sut9wu%9yb%Z=`cJ~l&zqNVD_gOMU0KV>gRa%
zC`65~#tKRy5@+iRt84|Zc9ZVBn>twq<XFdo@1JoOMT?kMD4K1aFLF!SH7{c(``%Nb
zY!xCNtBWtX3UCfu{>ZnCt)@CZjd`-ohb>(QZGS#3Ad@qjIx-+%yg4R!Cu>l*=8+bF
z2sO+tG_JxRKMq_!8zl=baPE@cOKbmgc>71mkj;&J2FYx2iQcyH241$en73Qi{aTnr
z4xZ!jNvmm=JWSBC#Wt#U*XTt}09FNWbB^mf)vuZXCNXZdvMvn3$1eb_$c7yy%dcMf
zA`#ei_IfDeh>O@YOO|n*H|?;Rdf5t|CRJN1y`+=?^i_8iOq>i<W*~UST@JWAJqpvT
zy5b8rCch;aZU7`Pq%3;+rCNW=uhvAX-aRWQ2PAa|rweM@8XNKg%rjlkvY~VBAes}m
zSX|m{xaWvBG!FD$HYBF7z=C!R{n@408(;oN@>ndZ$DAi{CiNnc$;lB1CTRvF?-$D1
z>)r-~!WOtRb8Rexl3kW|I?=bg|JStLDt>PA$psD>&J1}0b|6J!%%W`QRqXtK!@*Eg
z77U}BW&bg@8kmK|c&RCKOCK0~fhB>M|K=hwz~Tm6L_hXAt#7umE^EYK>$$Yl`?&^Y
zOw(n_Jz?E0H*Y0X49o~m@3FrpYK7DgCwcEcLowGr)jw#rb*w0-Uja{rLfMX8hR+4m
zgs(J8!zAiHh+P_Qv{G8#w_=l@-nI_CjKNB3d#yiLZ}|8$hYvb|#XOD&>^zx7vB_$g
zM3=`kK4aG0Wlb_Ih);*asnRr+waeb|K627u{Ih4Zds_r&&a!pAL;mhL*}lIRZl04P
z@#^z(p`44%rA$TXvJ4wJhoABzOX1lWAfQ4Sf0Y()b2SuzA1U6uy@1E5IH|}AbFR-?
zC5=2@`ef{688dTNYgVO)>8@V~G-Yh1b)U;zHX9+-FVHq4?6k-kSZSp0AJ(!JKthdQ
zABS;KaS0~=Y&yA<3OCZ0<EQC=1v#n|@??1tA8^!q%>8i>&i?pyr&(023KNH4)c>?S
zHnjzg?lMdQ#ql~PH_KjZlIJrQX@YhkgIc8K4I~=S@l1~HAuko^(prM6tp`6R58f$O
zq&ccB7^9H_#_4gM4%L$rj^ir^rXE%hOa2b%iV886Uo{~W6MOw3w;{7&cA$A$b7-Zc
z7<DfFHsp30%0kE0^so(p@e^|MAAs_=q!DzvIl=NUePi7qPO7C6_Aq7{uL#WKGC{c(
R8tKMgfHQ!JWupIn_#de-3<CfF

diff --git a/research/visualglm/finetune/finetune_inputs.jsonl b/research/visualglm/finetune/finetune_inputs.jsonl
deleted file mode 100644
index b9eaf99b..00000000
--- a/research/visualglm/finetune/finetune_inputs.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"id":1, "text": "这张图片的背景里有什么内容？", "image": "finetune/sea.jpg"}
\ No newline at end of file
diff --git a/research/visualglm/finetune/sea.jpg b/research/visualglm/finetune/sea.jpg
deleted file mode 100644
index 71e7fe491521265c469c51cc60676be8c770f128..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 28270
zcmbTd1ytP45-<24+}$m>26wmM?yiHo6CjX4kl?|cBtUR?NYLQ!?(XiolkdKF@7+D;
zz1@91oSq-m)!o(A)mGi}I`_H;V8~0$Ndq7d2#|$*fY%KaT{%ff6IFE;X*oqHNCE($
z$ST@8IYVIpfTNR#o4Skyxvri*Is6C!1t0=&06PGff!$q{G^8~FNRX3~BzK1}{l)(Y
zH!}dFBmkIXmQ^Ju{}28D6T$?$xOo5oNEO1(X<_LOhTvcbcJTIa`Aa{8U>tM%zZeSU
zFLr|n2*Ct@vGu?3+kbfeg~k423r8ml2+v=cT`U|e{^BkO{^I3n3Bk~c5FF}dYv~Qa
zQxHt;;OS@!!FLdh<7jE-4gfGHf9W2UU>gW#gJ2XlO?62K76JeS6zl)O=KqB~EZ;-q
z1OQ2A7auoUYa0)8dN2byJ3l`kxvZtPgQbTDv#J@`-ptK{T+-Rm#mvbE0RGkHzqtUc
zzq%!dD4B<ypNEH;g9W1ge~15V;=j56_uy~e{)^*M?Vmma;SK$V_Mg1}p*iOO0N*u4
zH(&olGfxA6h7bTCSojZ(`a1xi2LnL;=zp~b@2|bsczC!7u(Eo4d$ZVDf?575^xxtC
zmEqr<|7-AH^|Ab|@87y3e`jfB=IP);{#U7BX9s6bH*$9uGq5E&^Z(O`|Bo~NmstNL
z$6IwvD@!*^CrDM=5G%8FvW9fGlZCB^t+Nxkt<(RihyM?={g(`X;lJz}0<iO502~t*
z0AmyffIc1qV6c$^Xv1ts3h3YcriiEw{2h5ZZ+8D>_Ye$8|0n*xMNqMjf1%uMt;zqA
zCDk>_!Jckje=%fC{Ec7%Bmf=20SEyyfC``om;nxe7Z3!*04YEoPysXnJ-`?M12%vo
z;0AaDAAmq06o>@A0!ctR@D0cVihy#U8fXB1106sQFbIqR)4(FI25bX|z!`7@JVGWi
zcn~TG8$<|t1EK*jfjB^XAQ6xhND-tC(gT@-tU*p7PtXU@XHX<49+VFH0V)Djfto-a
zpnlLeXdbi<+6P^L9-yG1P@r(2NTFz<SfTi!#Gw?RG@*>3tf5??e4&D&VxUr?a-d3~
z>Y+NI2BD^*)}RiduAu>FWN18S3TS3%K4?j36=;2ED`+?90O$zlWau2|a_DC0Ug$~a
zHRvPgdl)zvY#4GFW*9*jSr{!CFpMiq08A82I!qx<JxmwOB+NR@8O#eTDl7>s6RZHN
z9IP&^HLN#mC~PupK5QLqH|#X*HtY=?92_1T9UL#5ESw&k9h@Ir6kHZu1zbDaINT=O
zH9R~#Av_bjFuV%98N3I4D0~`xDSR9J1pGGq9Rf1K8w3snX#{-)Cxjq`WQ1abR)h(J
zU4%zO3`8140Ynu<3q&8pSj0TUX2em%ZNx_;Oe8uaVI)l?JETCQRHSmGZloooOJrnZ
zN@RXyHDp`lPspjrmB@X_>&W*gSSXArk|>5Ko+vRW1t{$(^C*|7sHn84VyOD49;h*>
zMW}yJS5WWJu+do1<j^e8KB8ry)uT<IouDJ4)1Zr^8>9Q8C!^P(kD?!8AY#yBNMe{_
z1Yl%fG-1qQTw`KmvSX@XI$}m)7Gw5f?qR`W(PBwqSzrZY<zoH8+Qf#&roxuQ24jE5
z&cp7;-o}B$p~I2GvBQbPDZ?4TIm5-m<-*m*eUF=g+k(4>2aQLICx_>N7mHVoH-q<p
zPl_*&Z;2m{UyeV4e@j42AVy$G5J6BuFir44NKPn4XipeN*g&{U1Vh9~q(<aTlugu4
zbVQ6pEJzF{jv%fkULb)YVI)x}@gvD486vqRB_ov~bs<e7{X=?0hDRnwW=EDt)=IYj
z2K$Z38{0RDZ`$4*kmHhzlRJ>7l6R4xQIJr`Qg~AQpctWeq@<(Nq70&}pj@UxqT-{n
zqDrLdq&lO1L#;&pfx3ixfd-L=kH&^3g{GJ0mX?lIhc=A1fp(9MfKHCim#&0vi5`_+
zl-`v-hkl9yj)9NCo*|22j1h{Fo6&|bgK?AziiwBGmMM#A{4MNT{<ltVbKcG}qcDpz
zdoh<Vue0E>D6#~xG_ahoQnTu_#<BLYzOZq#Ik4riEwW>=%d-cuH?v=GFmix7GB~C<
zQ8}eKKXNv5o^vsAS#o7_&2eLMD{+T$cXB`T@bb9vl=JNKQuCVfX7bMR;qa;QMe+6V
z!}3e;f8_toe=ooz;2}^ga4N_wXfIeSxF<v>WGR#<v?)v}Y$p6ecuj;{#8l+F$eJjH
zsF`Sv=!O`Tn1xur*seH(xSe>Z_=yCYgquX2#I2-&q`zeQJLq>Z?;_rfNMT87Nu^7z
zNK;B%OP5HW$#Bc~$+XMD$|}gl$<E4=%7NvI<j&-I<^AQm6i^g26w(zo6d4uW6q}Wx
zl;o8Xl$Mlfl%13tQ~(t@l?0V#RXSBy)n96GYN~1(YCG!e>L1j5HLx{IG>SEDG$k}+
zH5aw$wLG;twb8W=wTrZGb>8X3>#XUr>iX*r>JjPL>NV=a>uc-h>t7p486+8O8*&?l
z8qOHe8ND~^HzqQ6Fm5qHH!(A*F@-hNF)cQIGE*_jF}pUGGtV?X151Kaz=sxM7Ks-7
zmco|tmb+HMR`FJQ*22~a*84W1Hpw<ewvx8#w&!-TcHiu7?N#gx>|Y&p94Z_U98DdY
zoUokiow}UKoZmZ-yD+(gx~#bhxF)%txhc5iyMx?~+#5Y`Ae*TnPkPS~&vh>muMDp{
zZ*A|I_n7aU-VgaO`h@%J`bzu$^n>vO`*nVx_z?JE-Cx}QM*t|mJfQO<)yLqE+n;1U
z6$T;&+6N8?u?58kU47R1+#F0A{3&=dL?)yp6fM*(bT&*d?AsTZFScJs!a2j!!e1gR
zA_gMaB9kMZqQFrD(d^Nw(XTO9F(a`&vDshYzB+xKi4%=0jK_@kjo(aAN~ljHPmD;s
zN-|06Pv%PgmV%VxnX;CunA(^|o%S{DDcvT0Izu9(B9kODJo7foB5NXBEW7+0>9@#l
z58rLS&;5}3QJ+Jblbj2e>y^8cr=8dPlkaCyK2d&T{&Rs-!CIkuVOJ4vQBg5Tacl`x
ziD$`vsbT3@*}JmFa^~_M6?heql|ZFu<w2Ec)l9WQb!QD<O?fR%ZDt*IT|_;o-lzVo
z!KPuYQLk~lNxrGGS+Ke87t61L-xR+yT5wxpTaj9WTVLCJ+pgPP+K)P{J2pE_Iv4-w
z{+a4h>l*1+=<e&0?)lRz-rLqE-1oa*pucH=Z=hk2XRv;Vd#G-hd$?|dXQY0VceHVg
zf2?_2XuNeobfRPO-DJ;{+|=N-^7QzO*38_j;q2O+#oXS!!~FSz*TUoCrzO~>h-HlB
z<Q0;YoK=R^iZ!0Kmi2e*LmL_!i<{uh!!7r%r|sY!)ScwrH@ii99DBd_rT51U3=Xyr
zT@D|QLXR<zGf(JGYEQ*Zht71*w$I(pUoWCA2`>w-xUM>{Rj*fW9B&?PBkl<A3h(*u
zdmnTjb|1Z;;Ga^T8J?S86ke8I9baEx7lB{^8Va(oLP0|U4CD_M0Tvbp1{Mh(9u5Ho
z2?Yfi2^kp`4GRMm4HFF+83PXk6B`E?7Z(K`p8yYs01F2f2Luh_hk-?eg+;_cMMlN>
z{~caC0ZatYTTmx72or$91VLkhUb_HN$bJR}`tMoie+Fm(1O)>N2akaGx&R>jE9HiN
z$PLdjGyjy%TaJ2UXlQ8MDhiHI8v4^3dnX$~5YqMMR+4T8IsO6lgkJ@U@p5Tt>CAXh
zGN3M(U^x0tXpDDA8_UY(i8YzwbP3HK3Djwv95%IJf*v*!cu)*<6gl}95ZMM8CoheL
zUrtUyj#e>&larI{FGDaKdkkhU93~9(;*dE;Fm(X~okzT48i$pYmEw35nBbiYK`(&>
zG)55SaXtfr#DH63mVg{5^-!7=Cw*ZLGbEjaoUap#pbPWZk^xDsFP1?tFO5S@F%2Su
zl?|kPQ$7jIU>No<5(^O;it(9wm)?qL)I<F_X;K^!HnnDuvZE*vqQDqowTU5VbPRG^
z`_%mjv{qJX78TlMczqVD@G%euh!(oo(QPS^Ih1zkAPxM>Vlh<Yn%;8o^2bGU&wd(A
zAi?&j7ZmKrFRM4=p3qO-&!ITfpT@~`W&ClM`EsE_1w*@YqoM}`B5#yA*?@YyMdCJ{
z9HgzBoE#BW$yqr*29w#JicF_%%+CB!&aYe=OL%u<dXsPE7-Z!})D!ahIV^@Cif4do
z`^M#zF~gZOt>}f18#wM!3fC6gl3Z1xnr*T-jv}E9(GiDY8l(+YQPw5n3wJL%4r4sS
zUvOMgTSVi|+5rc0ZCUP@UpQUVy9@#ze~Zl-BCu?798p<f^Vc!nCVTThV=v!h|NAj|
zKbN4_(A8`feRd-;FLUQ)pF=Kz)<(P2?pZ_nj70(dfK_gb%S80)&x6t4v0&T=e*<TQ
zQm-MB3#{66Fgudoh?_@Nyp`ggu7hUU@TJ&o7Nc5I|NUPJTPqsp>;YqQmpVLKpX?k;
zW)ly!gGD+px+xI(1@jft!sN5d2KaZvn;ye@g+^jHX|Ckzlod8!xO4qe_DP_7`M&N{
z^pN+yOS07|yFu|{#&V}#o|lR<(~+M#<7<AnE#-HHuF0{zM;UqkeUPpa%Qad+7Qee9
zQ4~D9qgE{e<GMC|FEb<L843UO*pPlMsn=eZQ9rvu8|EP767El0yV|fxiliw1{VOmi
zOnmZun;rXxUh0So9yerXrGPYuaNBAsbdWHn*!v3=hoj&v@^;ocJz@^Eg-a33i`Wd*
z>jKi&=~56HMzoojCmUH9#4_XEy`m%~i6u#+u?Sj_BQHk!6Z>_!Hc>|%w_87+n=m~f
z547YGO5?(e{nQ4<FtsdJ%Ee7>b%c{+sCR3JHnn<)yczK&4fQw@i5qi$f)+bZipi?=
zEFJ!|*{)qqi(t0mq{l8tN4<66Of+?9=F+@cbY^Vd>w&Zx#i6H^k#9ItFF-Q8TYkJ_
z^4;6`dkJERM2E4zz;X#s6gag;?uP-&{cn+^+AB2o{*dt#JUz)Pqgdny_oKV}iWDD{
z$kx<j*m9zd5MVyo{yOqT!EQN$Af5^-h8W$CXIfxYcuU~aV$oTmp7q_;i6Dcw&Mz{b
zt#aq}ihP4FutvcYA$zK2bH^riUe`J4?}Tj4T{`<$MI?DkHZQ_zeXnYdBgge$dNt%m
zS~Iqpv3F-DnA>q-Wb#0ao>KEa<N8)=_0orfpz-C?lQxZ6eiUsmWY)3!z5-)IA$#lh
zt)oekPZ>oY4j$gLES2-(8d`d=PoI`~9nSMR=<@xMd}CC$Oj@EQT*||W7+W>{sHaVZ
z+<lqFse0ipd^S3U_I<rXxkk%%x*IGQV?g1xkK<@aBRkzY38y`NBmv*zmgTRYd!0SV
zUAp=7X#IQN*nGoS`^H2+ZHEy@f+%%KOS!4$h*weKUA+b4+{1|A&$mHv6sYOIGdW#o
zM(Fk&tN7lF`^7B#m5#GlA^k)?hE|16k(FD$!`v??Wyzj8a$Op}&0@8h_`WR;==~Nn
zbRCJQT2jGjebKBB&ata4oY4-db9{XUx$2|_4DmH~-H$!OpV<z;@{Ca6_GBgV!e^>%
zY|Zg2VfS?3L%B=uNY{H-OK`A|qaEK*b1r<>zO6p{nGO6ZQ?rsNTjLg|6Y6>ix24lG
zqUzkhUC>&<;Mh~!MwY1P$r$+HQ~C&=Mo__u8$PpMvwW8e+ctUa$(FN(_1krUmPyN>
zLQc?fCEKrnr+Wqc`#-TB-)x16W|N8H_Q7Q{$2~1xC+hvk43v#fL>}y+WG;DTJr%x3
z1gRS?4?jnVA9pj}-}aG=7W&e8v}sC`<wg%oNS<gk3B<bD8njB$tJ~oW^dN+KeN(&+
z{Y^wUU3B+t?<3cFw^59TV@Obl!kft1BJ}#~<^FEGrwgyXXcIz-Xw#vQ_By8g8?rx<
zC5j|}x=rMp8Y0~N(6B;E@YsoVlEUY>Q5EQ_UIEBLNDKlX%iv#c;J*V13bIhc!T+-i
z{`CO@w_mBLKS90;I`Vl?qlgL6GWapb)d2*!PY{3GqJx9!B53Sh<EtEi06@HhKo1X3
z4-eb-9WhEV7}`);h|i3e0NSy>hsP}l=93t?IWff3Mlc6WOv(eH$ig51<`DND049JY
zrYE-Mi<f|-2(pd(TiqNq!2rQ3KfSCX0Qw9efT9T4zz&X=MTi*fkYpe*Z93Gatd^J<
zUXlzeI2Z+xlvH9<6he=iJ$bla^Bm80FcaHfr>6nQLL3e#z@16v#zXAyG;x2WC<nL|
z_26JO5X6_(gS-`>^X=oWo9aa22U4wZ8g;_~acUHdF#v^tgbaSP<(J4vm1OHl&r&nF
zO7JH+h#=6Qn!$6vho@naH?t2KoGLX<<Z+6{Ly6Sn0O-5v{MLv31;18&Z{DP<?8W}D
z+=+NO2C;Anu}=U2G|AmJ!HztCIE93SeLsJcOb=f<2E=fsF?k7$v38IE=S8dg$W7hV
z?<fBI)uzeu#iU!T4N7yOH{YQWA%#;@SKY6Q9tqlsFlAs@OdhS@@~nXAM@XXM{Y(R?
zp(Hf2`TKo+jJ;j~@%q6xoI66ttqBdEO#(Rd%ra)*&P6-~vh#h_Q{0TyvyyG(_qQ(@
zu>ah-I(2n<*&5az{5756(4<xtmV<wQeQP&7&C<Hht4sFy&Pb-A0bU0U>yxFL)<+uz
zF(4mICW2sQ_})ufFQ&qoj`zUp$i<~Aajs|aasCq^DMvp@ap|(&A6Ag8&YS;Y92Zu$
zf4X`84lPd6qfIPT3<`#Nk?rE?Me(rOU2(8LFKo2JxnlkPJ}0nV;G-W$o*aVMvE1OM
z+r_1?U{U6WHjSCJ>)$15r&k{;IJpFrVzT_VesaW7BK`W3?VK`7(p>wKz{UR~o%V`^
z&x1%9dP;g?XO=M2DGoNGY~V~%w!P?Q=0$mzqlY(w6QjGe6T)vKGX{GwHB*grRtr<*
zDCm?kJJq&aE_T@pL<?emh7Iq|w1_HXj102#Xn2YKaz(QIPVA*fDK;t+VC={{QL}q4
zv~qv-Ot>Dkji_GYe2Qa@U{3ET_b$km|4)ckno8u>jm*zI`<<P5xTIy#<|uRNF4W4z
z;4#{!dt5@9tc}xexy!3t)*NvjT9c3AA)MrhU*KT!gYR77vq_BUr2Usy3L-OQc?KFx
zT;kJ$v1mKSf~ny%Q2BPZ&YIniu7pMDvdIaI8bA8xt0z;Vr82zx0>2|Wx!t?#TYY#X
z$j4u0G5uri<(xr|4W^w|l^Xi8M$z#6QrLBNxq_EPl~=KTb>#~O3<5#>*RRwYyTgP1
zPZf$c*DL&H(0H{N5X-^nN5r;q>z727;r$k3ofDffb;T20gOxuqZv8+GWh<LR-8F`U
z!OE0@m%<vGy^<Yooh2b(v#QtoKc<tv6AEOI{Lk>;(+Tw7>EyD`k%#^7c*X+6plIaP
zgK0RDpH(}eOxYklMg*}>SU7p{i9eN}c~BhSApSmb5abvUuO9qe-ARLh{WAf8_ya;i
zK*W-xR9Eo)4A)Uf5V{QoDGCN5F<y>R`;btqrvpXOPMR7D!1yE^he!jrwg@JM63<6O
zB^N_Qz(Axxq%Lsqm}8Gd{1FbN1lc4*$;wJ#CDuncRuXgp2w21toXWJPOdZj3lrZv1
zHOd)^a0oU4h#lU2V*SE5gAi6AHH04PUD158<0mduNJGd2{*)!CbZLv13gpCLef{Y$
zNpA`v15nUYUbi<wyx25#0(aIV>k$PQ+Rw0mFAe}eyk+_Dq$eygGm0vs?~5XkJEkh|
zPlCjO_bvZ3gTFI+zo+=<+Sr^{)z#iV<w0RiU(YoNg%xg|+&;yEtfbEFJr%u`Ls}Fa
zOd(lmFjucu5;StRDo0f+*4QpDt)fL>e&;eSSWSL+`~<m3AwywpI`Yf*61w%h@GR-I
z-<+KPCJ6nzPD`9I0}KY>!rs|S3uW()eK?Jcr5mrmy=e;Y%bMC)-vM)MGZS(sG<?ru
zbAZz*9?UTBo7(fX9=N>n@)k7iEi2jJUsg0!<lnR!vmDmF*y-3>mVKKhzSHmH9?Rki
zHbR^ImifMBVc|;ucR6jZLil{0W`>gl+k0MFy9<t*o%zL)lhi7fdSfLEkomFM{6(Yh
zgFi`ayDiL!Xx5}tQ531Mf7$U+c$v~m1t*?9&Pbc2J2-G)G&1*>gDrw?|9u?~&mz3X
z#nDkiP1be8oexV_uEvLz;r8wIqbpLZ)cC{?BZVHlN{L;OD+k-ld*zM-WR6CHa~&Dh
zDj^d`bK8j2@%I_U1>Q>a*tp5fMsKswQbaXN`rG<8ubPaa{T7YA!N1aqB;>?e?ZrJk
z_Q!Ix4eQ!gMq9oa7&Bga{8nCRWyx%5S}%Xm?m$%RypPtxcZ$qmPfB1KJh`!=)mE6>
zz0XeJ74_EkN}<weOD*6uS3}-`HPYhsn!)wBFc@6eJ3g}TSpE8*?DR)RL4%OL$cmk*
zHorP+I#W)mx4fyR4=Hr%{Z0Ml;eo{5(iLytqG5nHnCZu4XpZ%c4@=qS<{marPem|?
z;}!RgQt3*VD68B1r|cQC;9^>=`)tE$MbbL1UM)p%0e(&8cvIEv%8t`0RmjqaDuZmA
z33vb3;(T5Y%6YjhxFjblyZUBf)E7~)jWa(3%?WiR_4}XGS{mLSSg8krRSj)PyfqU`
z`{%c(pSun`h~E`3O70}ZrI;y&^Bfd!*LP1;HOvgIta~^I4leIoCTSp$$7VD)cybNE
z&1)%DLjR0U(oud!NB2MDsu%DPPhIayxYQmP1behgB75~{EX@xXtoH6VKHlB=W(W#u
z%U+)^E!;=K>?iJd;bcuBd0bxAlo%xKHT`N{T0kaFj0@KpBF@j!C{Uq9vaDJp_3*mH
zdGL2sDi8RgRZI)!DMv|bgUm{F7yuLq2I7YJXRq^jltCsX0J6!!#Dd4hz`-RKQ)R<L
z_#fwf5ENwgf<=N3yzD*SjExbEeKQ!tM-{^q#77>)M-_xX9;&Tv{0RwA{UH(7i`#o6
zyBWh2gFqg!HV%U!4&y@qm)`yg0OK$&HsiT6TjOwF$YWrUaOgjV2B>u8pt{JAK=?2v
zLLem7R{-bf+Zu#V`pb{M6wz)jn<pYs+Grxk_g{v)Hyi1qeUS9%H-Qo%=ocy%qHxhf
zqELT%w?{{XE=QkUGRJ0x{+Op0^eWlE0?^WsyaE5z=b#N{_e`q0(8}uKVKeZyvTiK0
zc?H^^(*I_Evm{=Z(RgDpih4BFAhO9RYwTu4wKYo{+mS1?ruyUgxa!pQ_*_3LtKXG9
zZpS|D#G<jL(XXIi#Mm+L#&T|Rn8!+Sf1YaYqXVSDkmBZTS*aGQ7g!=5POy3TcNgCt
z3mtQXv48xn@>{9d(BU{U(jwMA!6n1qKZZ2-6)=YA={Z|3Z>Yav-A!j9*~DaqBQ477
zS1nw8WIuI+v56z0bZV26&)X4$?4jkqtu3Dj&;9Ih0UN4*XnwOKBV<)CIEvv^xKmB|
zW~`<4SL+<dTR*(5s(|T!^MCjY^~zb?BpIw@7I^$?_&46?_w5n)@G`ry$RAp_48@}P
zAo26~q1SnOX7=C2wPZzdVvjl<{ZkdfOQg{4t<U0d^RD%I>l^z8p5-FP%JSrg(Aenq
z6%o5#kHIYs?ZqQ!I$|9Bd&Kh3jfcJ38WN2zGREI;G$aCFfmor7ky#EClkJNP*x~J~
zY@r#qH0F()8M=hkpke);pTo<$HS@!5oW9jM(Hu&~a(;(9ls51M;|G2^U&PEUxGo->
zIHcDfZ_%hvSK9(!My)F~G|S=}{T2hSj61HESnzH8q*+{UZ&YA(2NeTY-dEy3bhQc%
zugJX#xk&BzHjt}pm@(0>n}tKLKR;Z(-}j6wZ7ugqDB1R`ukwSO5M#+Xs3;yp^HL6d
zZc%Sn9?RmM>D@4mga?;aYb4IT_!A}5@puWf3Zj#SN*XH}lK_8Ms0?ZwD=!%x=fX2O
znubGP0apA?5%P6u@{KmmOQ|2#522&`#DYiQgJH}2k{E*3k*Cuk#rHM;ko8IzW($8k
zuD)Li_rVS9u^e$`4yla^W*{JOu{SI-e+BmBFPsE+gwW(yE_-|$vQp!FqKUR@WG!#G
zJJJeJ5KKFMK|@S^F&QoX-MD;dfN-2Km}bc2Xtt5w-$pWAI8?={@*v{{-=Ozc|9D6Q
zXXwORA?~rwX2{CEqUO{#TaqDh@YqgITWtN$?Tz2N)>X5wqF^au^~IJpNgZ#28Oj^Q
z6^)-8khGRpz;yI<N;7=c?)=6?ZsCLHdRD?K@V%;)Ye8Libz$;%>#A*9K8LsUse*#U
zC+G?l)v(F^D_>!eoY;52vBjx$bvLO-R8q@~p?<z+nt7{l>gwyZxUjIZWo<Y2<AOG@
z-r|E<S_IoqLL`net&&0$osyaA(n!VuYAkp(0$wLSFO~}xA@uG6!<k(01OM+<iA|NE
zw~<!tAHUg?-13!vNccm72^K@+`~&tUn$^#MW)n_LH|r>3P;suDHF34jIV!Jaf+B8k
zdH$6W?^8kM=L2)Bw&`R#&>!&a?kRwXNkyk@LL;fAy0$G2i^BSBIxE&v<qd*#wPL6S
zd**<}4X2wz4ev(__tN4$t<PbnedJZ~a>nvfQQ^27aqj$rUu=|FW^PaGB@|;6ST}_@
zxue{*YLl_x!70T0`)ZbG6-;>OhJXU6+z|&?8J#;3Wu9FgM#pDnr0t<7+jccM@l79a
z-=T(m#hb(OA+LL0#GB8apSu*}@~+~SmN!qr2T9Ye)6=;2aT}S$)4n7k*&Ph_n#oD7
z+s7%0h1n_$N~O)bbEU9ZY>YEZJId=Fv|V=MVq!?tq@q&tuV1-)kl}Z)j3mxnA`%Eb
z=sV}KY@(<1;1+7#qmvg4*1;X%7|$=a@UkxKge(DtQkV_4q?>JZt|jIJ3g@Ze1sBn6
zUv}J+`L|m#J~W(n9dis2*C1m2sIZ6^9I%@=wMoTTn$`OLJS;5iBKFht`DN0W<V&A_
zhbKv#R(C5hnvKQ>E5-#@n!yb{n$j}so>QM4ViW9N?{Jb+4Y~g)PTX+Wrp%IzD<+g~
zc&1Uqugj=M6ZdvXk5A&UM5Cydy5_)UE0i%#O~RGCr&LzN|2TV^&{lOw+N#-+Kg(KF
zx>@nD!b&r?WzpVUxp;hTO$BS|tJfyxv60`L=u2Dat#~D53mZu>!(+*hOMMOtN9Tp-
zq*(#?kKOrXS4%p5r0j1%$21h;Fo5;&qEw$^uz1_sJSC+>E{`+u*gdfH{MpUPct;_S
zEeU9dna{CFej{F|;9MGRw^YMyoE){}g!Mkz7EY^yK`7#8Q739QW<yWTTg$hp>G2h?
zZm{C2mZ5Jn2uUL8vsDdxnX$;%z%Qlba*wQNq{AnB1-$1UIgAc(8Ku0M+-A-#f1Z;}
zWKKF^`yd%}-J|JRZ2!Kcjwf_o&CKzuqg8RF4p&gCmL)W36!tfJ?|Vv>mf!Sfwa==)
zdlXsUf*<|$8J)xe_H6^U@;Kd2?FvQX*Ingx+G7&Twd*_AnfJTF$(HeYC)4b5VD1HB
zM9I=u!17dsmg5hL8<;ZX(C@bO6`1D`3M`1T<ScFTuZDX(MvVMEei52=vQJ&3P>R>S
zLjB8ov~`^&4kb0`SfI~_!c0ZUheh9@Br<mTnI_FjM`L#OK(@>7%XbeXM!9TTI$OUR
z)NW3{u0f6=d#S-y8Gew}z@~ZQIaBD|$R=&B2G>d53Ipj*`o_&6V}lxt{<<Ot!rzUF
zItBoOT#vv*!y~}LBO?CYm_QB<Fql{ru-KIB;%a7a7#uiM60U)99k}G`=59Z##^KpG
zRnMr!B!l8}*70a)HNc;%H_q|7-?@KE$gS!8AA1;RQP3+eEA>#Y=t_#99KPjDd?I2o
zX&nab(?;E~>6dlmrn*S(|31#jkuU7zvEplCMu|;)v4&NclfJypp@|6(XZB!a;E5Iy
zkyWo=q^w$F=z5>Rv&I=?f}*OpkvOMSURL8g(+9e5g)tN&hD+<jKA8idnMaXr`=Y+h
z)tjffElp0^{ITBX%X69ILuFPepWaBYwsC!+yrp8b6HyCY{1NxqXjb(ka;=kFJKA4o
zax3Lmy^V7_|FLKV`q^0SjM!??g^9-hakg#Nz6AMfr<y9Z2>RQ(Wy#0sjR4VKUy_q;
z=3|DdY_24*CO2#c|Fq6*pPOudv?<n;^D)a$?OYG)QrM5xdSOWC0D7xW>rWK6QHlCZ
zGOL!WpUVTL9hZ{ASc67Wwi=cKuR<>>Gv55(s$81;$~y--Ht1hiLGz}8pNwqYZVHeq
zQ*_42C5w-4{i?=w*BRaJ7()C^o=c+Xa1|8wATF))g}ympZJ~V?e=XWLI-Ai;M9!ez
zf_c90gk)MIc+68kZfBf>4pDEEDblm?F>azI#cTpCIX$OYMr#UzH1MG}Q&*+i>vN^z
zMX~-s%=jMPP2TqmfkGL+tbK!&r6RiQbX^*3Rh_Cv65cmCzSb#qc#1cfNB#%1qOgwH
ze&M4lnZ6hf+Hdc$e_Ms~M}Cvl--=aVKz$iC{-UaZA7=cvKI2zXf`K1D|7;bbM)QHK
zH(!XlO0@D)WMb)6wQ|Siu<}tw{I}0ZlBBg8->zaWk0sfg%DrhrSXj4?XR%vi@ZRck
zgtRK#>7jxH1q!Sg>aDH)n~S6AiMhOEzM<)+G9T{zZmVU9|G98u4#%jyO^m?XJw1EN
z%(l1jO+qJ{SWDT}j?rvJV|4sx;ik^kZonGi8~O3G(7HfDq@L~?=95fezRmtY9kiAT
zIcw+eu+NH2nasj7%rN=T0mGI*sVO|BMJdl{QunuK4}rIp&38T*VkgkLaO+K6dVhG@
z#+RS0Dm_0c)AN5fJ`QjoU+7tE6)7pz$NO&FeZk#Y0B?Ou`M%`o@D&iX5jz+EOp<ZO
zZSyjs*ms~1ed+$$X(r~U&B~Nbs4Wpu;ntU<!(10W((Zx_eKOyJ{$g%*E%kS1X`9Rr
z#*|~HmdE2N%02Z#x_^E;(O*Y(E-oCypmx|I)XqeH?Hu+QDd|5aD03JdH86+}s3{>A
z3}i@D)3qMTZ>s01;u%JTe$XoXQOtbLsXQe@=z0?4vP)oc%7-rFp%S&0VEH(x`o1pk
zHgSPJectm|0DhoXnExB+4Qz|3zTV9srR@UO!@<RFvqnNQg3=XPIWJ@qpW0{VO;KB8
z9k%dnI1;BfhJkkhTOjVDQsT*j;?06j_Ox+x`W@Xofq@sl6+|bl3=W<qJwBjgHrg|{
z?G<sAPIUF+<6B+4FMJezW9!jkcrGqi=tx_%Gr~$Oh;f7Lr>W$=WZSJkQ{=Fsd>KqI
z2V3RVlALnfm9)8a@E-gb4be!|pxpD)-ibHxOif$y!LOOI@RLX4#n#UCfj~%7G9JHv
zy0>{<NT<W4{G(C1N1C5gnByaxS$Q_q0wX+v6;VfNHeOUd%6c5C>kd5khXyj4po<6%
zCgO+{+PU2WW&8R|YP}J-j!hTOR)%(XZ1bgi%WfY|1wBV_3@z%N+v?rRCaO|wgax=O
zH;S~@PAPS3M?)^Z_=5&CDVwO|#K#8jLNeQ_04I3I2blnjth^}Askw6J=%JD4j|V@&
zo2UgWmDI~?&Ks=?rP;;0IWx`JB)ZaXVa|tnpgy>VlV*qwH5Pi9STN8tZrY~U$q}KE
z&+>OI&Nr#<HF1CKgn6=P+5&C%fA<Xna7_=XNQKJaePKsPRaWp4k64nOQpP#z8Iv23
z|ICrh6S^4w2|(*)b(rKzj?Sq7qd1fl#1z22^{u=KgKIpmXEKRS=391+%<4XAl`l&B
zk*jS~YT4xMvKNBO70Z*r#PgE2B(nL@+~bthuZr)S@nA0}g8r#9cBt(V`7J%D1!2Km
z9ZOP6^LBwkcEZUi|2QCH>K8WG2fX-lwX64Rp5E(jxj!TOH*COEP8c&5GoVH)HFxYt
z%U2*W$HD=fMWDl-=5`t1+{M+_dua+p$sWCAs4OpxT~O<3@y7s`Thbx}xO%^sM*JKR
zWXHFYTj+2#fufic)94mbb`DE?(St2X8nW0}!5BB}G_HcV?j=6l?Nt8?;QCs-|2d=J
zsHsfHZSF8rLQxi&`6aCBv(Yxw662*k&_#^;lDEK&i!0Z}t~@q2p4N7znWT}@p90tL
zS%!Mcn{ub*)<qfW<{fF@dB3i=nHbE9f9D>>Fq)0KWID;%P8b*ek6{>$oj{$PW+oW$
z5X>$b!-O+GEzY6^_EnOF=9pAzwvHLtCrM>q7-JY;(tN!^P3>yM`_~#j5d4-kcn+o9
zpNp&8Z?#qrPkU>7zlH&Rr3%_AyKE~j>5d4&bY=Yx<{mcKf?G=s%XTXn%3YuV(=(5H
zK09CLVd{E}kgE8r?)-4SX6v!Xh<BN|3aXT+VX9e3z6^Fd%t8yf9U)mh<x$#|`0lis
z$2ggfZ$yI9jlkzOnFZ4lq+hs)BXlKmn7e-qjEmFGK6HRB4hFm@9p#GbEn{@*yTV+&
zNm?=nJOe_?^7y3Yqa0$fm0pl!MD8?yN2KVjGpg++p0^i?L<BGoNOlOUkW7}XDK~@&
zyx^RE&q9_%Q4f2=<6VHpaJ%wtSTNn2YinyGT#6qzzWN|=BanD(ClFaL1vUVg<tWiZ
zLOwju(y*-7`$})JNCLso+_SI$xGkKp-%n^Ocs&BMJ%4Y&rpVR2gz{2eCC{DSvCIAK
zk}rku(sIfRBfC8tfa7qb`B93yfc#TvRi3NalZFvZS^R}^ulmG0Nz%NMjh}umQKkYi
z-#l!ac^GL=IX7K+>e?&Go?oV|d6cOv`=`?N{YmSpnCvz>hz*Yoj&Y7j#6}1WrH&Y4
z-WiEX4U(ni+f%2fow}3FCL>ZV?VUIwujtR{MmiE(TPg{Q)Rm_q>XYJU%)3yzww@Rv
zDDtl(3BH%Ge4^lTQ9>;G0W;ab8}Cd#BKJ*7M#!rZM>l4aOm0NV^3vsLkcqr9=TE#J
zAKitrcz;m{JFl$fhPyM2&#$62OczsouZGR75Fs}SFAKtNr7>S8r5%qyJN%BSdV{``
z>&0hLb9O@aMJ>I6%N!MTO@qoYo#P;kn%~=^V*A4pr%qgJ-RBy87L5-$*H$_hLgYO=
zhOdAF9VxDq_RQA>AA>I!_K^FQS3s|AMb}s|q#_}R2w!PjyWd(0``SGW_3SsEEXQd3
zuG6rn{2oITR`dt7WG1Znvw>No7-|des+EFWO9i$0{aWCIxe+FHsROj-a6T)`r_5?W
zN#@NMOM7KH=Z&x-QQnH71(CLypg?h`Pp8ckxTR@v(kStK>o(3<KgO@(ChN-E^49kS
z!<8J~g+U3h(tEer$6pu`l5!o_o?OQJn}7V2%T3ehCCO_4ofrATDW~@P6!a#z>jqr1
ztZ`!%K~fs3Dk;^O_jM{xSTd>(nx3k&Hjca@(_cHbu9;B>s)hL8oukY0nf1Vjbly|V
zFML*f1%zeX80^a)wO!C3{&@Dtcb(q_d)!)q3jzpfEQ?mX3`Qj0&La{J=6+xX&EKau
z6BqwNeNY--kgDld*;c%4@TpDpa&r8wH%v~_0~*WMWK$w8?~PoY%eeaLO?HO+3aHhp
z$^5>>@yQ(~v&kLCQDc1WI@4Y$4Z3X9YbsdLCS2Avh#LSMt^|->(Qm$O%Mzg)%<)j^
zoQzw+A0SfCCOuCv_ns%rr6r+qdSNk|IibHbi1<)DjYqgi=+<wTMB*Z1Tk|PP71S6N
zBN8ctRqUst2Ew_V=!UN9W-=tEmU&M0cDzeSGb+9po>F!c$;_*()0cp=M_Aw0hhM2Z
z3H96d3T2c0-8B%}A2!`s@%2&rL18_jDQEVVSUiUztHYDPxIJmksy?Rr-a}77e@$rh
zJiNIrzNqXG+IRO><w+teMjsL*F81|bG0ry2$HXmt3B$xplo1p-eicg+^e^9YVuumP
z@u)W<CEPWyr>i<bhFSR=qyL1f$;e@V9gCgHTA=JreL80`gvryWWBJ4$S)XRA^Yh)C
zc@9GE6TYoqx@&Rz62S9rRbR$`(~!Wu@i7ZU&%g8*J&KKpl<8TJmp-ta->@NUS=IBA
zy!j37>+xJTelGI?C(1oi`miy;BwUY@wF7ZZ|0WErC~n9jIa)+Uhq$hc`9YHH)UtJJ
zWd0@Cx(xIF&dgzw&G;y(d|`OisM>O1N!}}Fayayn!-=?Ja`B!%vwdf0))A$VqTizI
zG6;b-{}o_AsKR+0l00*i*en1)BIl6c^hhH7_&i|0K2H_S|5knO4R_<Uw*BWu6#h%i
z3!7UGM%yi-SwW+(_IHLJ{r0?K=ZCt__^{VTr;23J&p#1<GusFBDdpE>iaFlI(vY+Z
zAO8kFcAG|D2%*MBf;<ORkPYU9i!S)ku}7|`I)|#gTjaF3wH(f6aN^~6*h`z{;Mx<0
zZY@fYVAh4kN{?OvCqL0=LNq_01)F}y#`mf2*K{&JYuz(8XF$g$J9E0TV0)>+%_r-J
z3|BoSCi1d}LvQ|W47K{}XrB6F`m6-wL&52888w0qo3lusr6vZ`sH#$GS1Q5c;vko<
z%|oOLrK!&bv`C`za3AFZs<y|a6o_?u;A-6z@%p*QgY2blSs6ayu$z6jR<DPHccST;
zFTB!@&dYO%@dJa+(f#}VtD6o~Ib;yd8XO|^m%>VXZfVsgWN*LDuJc8Q|4F)|L+q*Q
z%JhMcp23@c1<34CP@OJo-FUBJz2uLWP$~v#sQmo%Y(;ZR#tjTvb`;0ZZwD^)>v?Xc
zRqPiQzxA8ty1?U2ci+W6e7^L(4GNhtnz?Uy)=C-|Zl@S*tUGphqu?Y6Jn5jR0QXL1
z#Zsp26BjP<(O!ykbEBDYNAoakBSn-6YIsYtt7|lPFMfOlz8$|+UQHpnpo+L?4resU
zU`-lr!e=wkMQ%uKI8#D%A|}mIFkRUEFwlVq-OAfa^{n%?9O6~&BP7Essjr=6FTjr*
z#FMxo*c3&55-!S>(;*1OLrP;qlO(L38J8lTXZeE9TlNZcn{dzWSS~vqqOC8<Q#G%a
zv<(fqRV_8sYI34f)k|Nv80%$;B$WQ}e*4q9Y5w`JR`V68oWS88CZR?PS;hFJp|UwH
zEk1&{`LWy920eS+bLbUt|Jb2f>8PfSev76I&Bq8|jvpZZ_`UEInDgE$NYtyXvv}6`
zeo>I$XZ-Txv$S@rtuaCS9vM$~WmEn38|y1!8W=YLD|MNYnf2k56(h5$#C-xY*v72t
zfC5e@!T^RcC|Zl25bt*`<~CIJco>{yS2U)kemj)Mf0Ws{EY{m*R?UoM^0+dcYFv~m
z{`6OXR<S}fQT>!UPA-*o^L<T^mC#ryRnd{x+d703M*3lYI+UZqXVb(=j^pud%lAAy
z<OYD#8&4WwCtH@rWg5xLF`kuvRe9q&J;gOsV6J|lVoC_zX7a{9RhM=!^xyBD5V<-?
zABdPWtOu3E9t-;11~8d|?41?@Fpu>cA6x+af#IeVtxfRt+YH#{2x~Lc<@6{OYUff<
zaq#kvZCV%{A)HM4T$^aihm-}P4~iZbD$+A?tY4Cak4NXjRw8S;sy4bPisE^`l_X@`
z3Z6h(VKVUMP&xP5>5~hetF~P@Nfyo*wf$!X*vc>VG?q<z+o11z;u{mfDLl7MN|KEQ
zu$J-N-DnX5eX!%^#M7^Ujs=Fe^^)I!&~yqH6s$RNbX?5tCz*G90=i_(oT2q5wMWNC
zeSUZh&LP<?+YTjoFxCY&I05=zZ8k^D&KZI-1?bLFRCZfvCHVyeKRf4oGl@~XsHUuZ
zBk2zqxirUz_8k^N&Ak`lH?j|eS&{oG1(NYrOFv%R8pRm<ANF6JTIV(4O<a|UOc<OS
zD=fy0^_ov^SO4Pl-+^qMOOKxMWO$aa`!On3kVxB^ym1WGlt~_|B_Yl$p<GW-^9WvT
zF>(x~y#g|Z5glX##R&mBgrz05%0mlM_`7**$dX`TWXj?l(&Wf3B^u9tuA+*n060i@
zcU29K9-KBOityG*{ee+-(9O$Q@xEapaGV$>6=yij`|)8yk}5=Uai;;fr`O=bWRPpi
zP&P>AuCw3hr<wZzm*(4I>lma)8s}1BfsCuTpP5n_d=XZQCGcArtd@I`-e3A$4ra}m
zxSbA3Ujc!~$B>2}(_Ezeh3IJkmzOMGPW$yd-z6G#^OHR?>kdOzPH1Xl)^VQk9yL6D
zgzfo56AbFNs1FnjvgAhIO|H(#subYuXBK499jAvem~!xCl(9upl0x-29AanF6v||W
zo021BJzy6MILlI*x1+lUzJu*HItP~`JJ+0NEhtZ>ZFyWg5xkqVU+jJ7-74K)0U5@R
zmsC%x#gqETax$6R%^VzPk8=raQ)^#xlaY<e<K{1T-mH~QIk#B05a%w(SDoOUj0_g(
zykl}XrGILNT>pJy9Dp3a<omgSW+rcQWpCWAX1E{`f$y7#{7>4}6i<3q<<miwnyYkL
z2TWA?nKDc{KCxu@W?dvQ7;g(#OA3YaiV8z7Wrbpd!(EyQdxg^f9NWj((R#_9Mk$jl
z%<1nA9fo1-hh!WyJ*GUE>9W28pS)iIFrJi!HeAw+IkQO8JMg&)V?c}_ex4B(r3Z$B
zLde>RB1`8O^9|pna!j)}bWN<N{m1RLDn#KAW3ka=47qJIZk1<74k6Xe3zEOR4VwGV
zm@2%rrag5|MjI1VJTpHLf2A=AVwgR(*O%zdwUvDZG)M0w`ZAMxy}FZsSuRu&SHklz
z_VGFfl|@QbT?=?i+w|hqb37365WhEGqI?C6KE+mrsP8Lx?e{Hf9{fQzP)m4Am_W8f
z7aK+VZ2sBL%Y5U2Fs!k%G?}<0#Gg=c^{2*1cl4c{k7m|K*Or_G9$FIlq1qXC{f+Ov
z6;Ck3YWd*aZ?Zr=E)5;u<#S;y|8eW$&^~Dz`re+7sMNY;)P%+R_Kr0tmLte!EvUK8
zO99`rk%=-~S@^zmhK%8}kCNEAl2C$%3eI>p&;IW*W4-V6TM$2W@WX=RnX!#;vXmJz
zwei?aPp_FRLG}m!m4k9P0a@ExW@II`=|QS0{QG^cZ~eWbcgI3SCBK*pC@%JB-S><=
zF?!cTR0vmiqqjU<bYB5E4A&PajXQ3O(us<2^UpF3nbmJjV%Aj(_I^?xTMRWml*1OB
zt8vm1H-g+p<I#yJ_!4jF+!!uksecVbXLA(gT(w@4?K}_cc2+h-AmmhZV|E+FdOmh+
zq`%M4&;O30B(6(m;_IJq?#wU;In6N@Lr0KIx6lMInMmp>svKEjp;w=SZ$m<<#(kEA
zX7EZ?m+kk{mzCQbj*OR(d<rYP?*-*lO*qLItuhdbMf;ai*UiyYxTjTq?Cr!;h(qr&
z7(Z)T?Yzav1is~3HO$qu<t<pRqs&Z9|E~KL($1sD%A=LJ&8)Cj=h}2^C72xDwXhn*
z5x*kXQe6Fgs_yG=d19}l?WF44#g$^(7zVP5-lklOGyQs>KS!z!pA^^NYknbB_G`*}
zfvStYE^XCOo_g7^Ee1IR?f(u!Y!G9SjbZ2@+r@dY?aPT^QC6m3Y=jEjNXItlHfc<h
zr)#ts8IzdJWSfqqbrnK9`*v)w5}7LXh1z<o6h<`p<w#99)BF35@XvA=ER<At&1qlx
zvhs1Q>1++ypSI~!6I}Ipc0FEj?S5Iqf?Lddy%Xw_GTbOb)9{~}e<uo}SIzrje>@g_
zcWl_2{j=?f`Uy`;pO?9yaP^0E+>!ltl5#zT|1k8WKL02ym#R7c9ef=^cRJDdxW9$J
zJt27_hK`cy$N)itp{!|J8TZ<zi;ZUcD()2XAXaR#H`j?xNieo^6F1C?tP$E;P^B_r
z|I=hFjtb;RvTCuWQJUqyzceGfRU9*JgHT+8ta!If=nh+Dz?5Cc)i>VwHKua6^G5rt
z=TkyXA&PCK`(<@#HD0T=;!P*QG-CFQ$gv1HLQ7eE3_b_#gqUZP2dx~)?+i|pXVz7S
z__#^xTb{_^SMN0V*6epK=<2mjGT1X{L*t7RGkg!{i4Kw4lEu9I1!(jI-15WSngWmV
zdLLAnhNA}jjDiI^1Y}55yjyrNh91WnaQ}HZ0lAQX`ulj|?<>)NUQPg*6n}3fFd&x_
z<Ugu9#?RKz|L4mIQH-n7G4VYNa<O0-L>)$*BJ~zrrUT6)wTL7}>cY5C(x2tIDAeG1
z(GdKxwrCPzWUCJfKdvCpCg6t<#B{!so5LXLCNXLj8AGB{`S1-oj+}Q%Sk!V(4TNq{
z)MkTY=;E|r0|2M1ng`_{kf}Hj1_2`w>P<+1z?1F+=a;T1^LH`dg~iW;5El?A-vzy=
zcw%fbDKrg5c|gD*m%w=9-akGZ%^4A99k+ZCi4>A%k1`F%LTLAe?vnX_FLQ<)JH<AI
zp(qSR5C}z<v=_j8Z&Jca_WOYE3QW)yPFj^AO-mmBCGHx2ii{%VoipUBI0!`rT`ZW~
z4u-~8bN8(IiT90+S|s6;ZtvgUX!HnP`H_diVC9Izkq04I2?U{wx7pZ{1!0)}x!657
zQHaE2`8k-vVrXc%`<J~pAQy|;TpXeeG~e)S)2mo<D2!kV1iJNr2L7lQVco$l%fTj}
zr8aiU4c64ENP-^7@A0C*d;a2RmKg7!#D*vkB(SboyG)(*Sg*OR)Y+=Hy9=u~1|wOZ
zKQVSj)gPeSvmnXdpJ#Qeh7ux;iefz@(7dlI8wER0^*1^I&x?qkjo_aTcd+eV<iSx=
z5P{+#Qj>o^&}k@&JHQAgf519to!A~8Oxdg69!$Y|+gQC;Nvuy1^I~Sg$SeIkK?OHd
z<1Qt7`reyTUttuX);N6&9CA2J8AvjgH$i=?F*^3qlXW`VISjr7Gb3g6`4#vYG+%bN
zLnk#hEc}rZj=7z2#ID|>-7s)P*L4SdRV^kLi#^64#b4pdV8Nd<m=i(rY^qHmW#83)
z;+fzd84UY>h}&9ivgHJg4U9mA-hE+51L~weF`lo0A?|x9o?nWsQtnYz0xO@rh%&x^
zr~lz*P_HJOszzvLD}+|{@mexXA%SWBl!=*v+knuel_iy$|ITmWSzPrneWE)5LvM<l
zjxOV}UZy#SIlK>c`Q_W`_qcEjcg&@ClBak(--0upCw?G^2h%(lHo0xGK%(uztHvtJ
z?i3cnXrIX|XpbMYZI2dzcvd7cJh#7(%*~HmsKd`epTy8IF^(s#_`Fbn!?hCeKU#|1
zB9`ljPp(>!OQ9u?7}@h4ZJ>rYn3X-|^Mp*)f=g2>Tpx3_4+c82)JEG3x+pB(BGmOC
z#HL!AyMQ_xdM?0P9ZYFT`)oe%6)#nvQ})lwF>xe!Y-n1B#!OOizgZC0#5h^A`*oEZ
zSSfbjvhGf-@0om}fIn|?d)77@gs9XUVu<g1>$+T~>s$fFa<3mX@yt$TNrk5K^&MS;
z;u$~RNtri;DG|_4ENRer(E;XP=_O90a#hDt6@9}Iq%uFM^|e_J%bt~ay7CHXc<p-|
z9(jr@r!+cm6p0%!$bTz+Wxi>1-i;4*>5?xt#>c(Ll$ynqeG;p_J#leWcNq`YK1f;L
z+z-hwh9z>bEY?8fx(okYGgM|N9w(Yg^kaFDXf+l1eq<#apuUEE8aaM%=w~OQYxcvx
zv*IXS3TGXG?=u{R<zNcy48%6~syDhL@!7%K=&Z;&tSMbAP<z%NF4ympy0_SnXxm}c
z#Ed%rkV-7rT7|MfHT0OG5tzl<uOW6Ljeu<~372e$2FMqECUbk~>6-B(1+6F%k<{cm
zi|n1e?FKBYnG@IJW<L|QSH~x}tjT}jQKi>+sw+LR-C;9@W{lWOcQ+W$>QZ|@FL55x
zN%A}P6KzN|er!yK-MRL>YLJxp5ww#+-_evbV-}d+1x})MaLT{7Rd>_XFM4@N@lpy2
zC#oh@$Mpw@9oH|0{u+mX&2;cH3#3gD(`s_tcwhnJZFfU6W5<Qts`hy)3$<-I!#Rwb
zPUfd~)kv))TTFOuWKl8NIT7ctS2N4QNaL>6Z;vS`z(sPRJVupBV4_+qWDF9?qL5%T
zm9_RZ2AqAs_xX5|6C7^ObpKZ3@e^fHjknX?Tzh<L*w+3lK!tCC%cfdE%WO<Py)*uQ
z1urVl)svnEyzOqlCrgMZEJB57F1Pa7mU6;$A$VkWtEyuVv6z(u1ei7oe5|T#iDXQq
z+;^Fl*uG_nRL?2YQAg+)LKBH{<=^7Im-jAQxo{k{4cQ%8BEPy#zn}$P0c8Z0psxXc
zmVKiyiz&eE^BBB2BgOHyyhf@EM077D8Y^H*$PMIzz(z&QZ5nqF9Px*-Q;^jKhbbB2
zq)dWWNq9ubn6|gPxLzvdfNi6~D}Siuv6^uH%>JbXduaT?;6RJ8;?-lgWTwme{rQ0C
zi$hU1uVn~pv6e?~bD_sBJP^CZzY$WcAuX;F@OMf<Vh?iyhW->DLxK*KRdTM@U}oEy
zo2ap3t=?e$2nc=Oh>Ohk#}r<IxG-S`T)az{E?il1<;9By%0e6un7Yn9%ozKNEM73n
z<9MRC?38;DY6`X|xvejbex{3?81ZA1dZl^XO5h}{R!f~n4<`~~Z$(0{i{?`9?@WaO
zl~e%mM)^bK<fz&aCKO=?aV^%~28G>K(rPMPt1k0!fZ?0@Az8Y6@GtT*o-S?%#gz#Z
z=4wIz00SwNp7}-K=>Z&syc+)iyur?@0+oS+#VC(0b1huy3rq^A+;WsUj7qTd1p(3G
z92v2RGNLSX3F2g=gBJZn;vuQIxn!qI!3xSyVyR}};tUyb<;#~ArKS;)3^kT%0#Z4i
zV-By0c%a9Ieqs9e0i@(`!W|9*>x>`)sCobZ@e)WkLyO|am`oOdb`@9A#J5AnEe&r`
zV4DlSZhj)5N?M^ncnv#5#P-q8PY+|0oXZ%`xiQKd)$tAlEpdHRaZ?+>xXmedy(~)&
zwIaOP{fFEs0;F1$DojLT66#%?AUoR`!B5w;9nL3$(Mv%<&MEVf!qr016c38GmoP=5
zK{;k;PGLYK_T%0mx@K^~qE24XxfJsQA(?roMWyzcc0K2fD_LL>aaW&4IuPhV@QL9L
zwD2c^JV^G>JjY}R!v@r*9EoFvgQzCgmxzrzqLoXyydOA>$OGU`SAZ>E0b|;;QmGex
z0l+(yoB3{coJ!G<0_^YD{z;aayqG;Q%Iwn_U*t)IgrI?f4Hc-R(*my0X?@9USZHf?
z?h(>fMN?LBr2ZK2+E$SthGK<l??b$OML(NX#e&&Z*Okyv5Qc?B-cRH{<$kRCDi{XM
zR0{-*1+&M)H3b5RLuCO&$q8j*SY6saVf#1Ktp5NMKiP@o4(1rRh;pSZ{mI<@L={i6
zJU}6NoAE8>eMh-<dW3or7{oRrlN{I(L*E<B%G}OcS6PHN1()|tcMrS@LwSovkbR=;
z*SiR!E?Eh}r)hDus}ll&idePy53n7@<WB<F`SAwJOwR$1@<EVdusPGZ(cmFdtQ6X{
z77iL+Q>H2aumq&T>U5t{(L?Pn@j?0@<UdbEH65E08BWfIzY22)sa@=PQ-up0=NK*m
zc{pF1nWCv8vC1%VvNr+&BRHypsn`gz70eO<kX5{^lnJoEWIRJtG*!2Ff=~i*73QOU
zkCF-y%o(ejpu~(}7q|{-{{V76ZHk`fAq%R7xrkDtJhI7G`ht01h!xugn}8aR%2^L5
z%rVSTv6commbg_G^Kt(G?qZliZx5)S{=t^ZIGGqFra!sc;PWeqdR~vL#SK#Kou3k*
zRV%qvJcav$6#~A}Yd#`|Eq2<NzN%t2_m$>V(kNg=921}*1|^3A&|G-jGg7vFrRrN^
z2tFe-#CIaK-5FLW9P+$PbPxAP5V1<0U<|AaW<m!2B}@cqK!DUTqqK1#NJB_AZ7kqS
zBErWgZ{{<J%oEy)UfJ|{iR^={)UTOKxIH2VEyAoZ+r4{Awe82_BV;{H^{FGogK=?p
zIO1l2xlPllnj-}`A}$DoQ_&`ijBs)Rv;GeOGT#|jFAGeemq4}{4jrr~9>6{&cya+@
zzj>uF3k;#FdGvth0-zkkR_Yb^fX0P~?fdf>Zy;$`j-sw~9Is$kNIXgs!xd*7NjV33
z3WYUx)HI@p4J(H`j>@f4Z~p*d)8vD24LTFHu7O$=6s)tKiTAL=6r~5rL{@A!w{|}_
zGtwX`1O?n6+}yc=<S`x81j9VDCK-!z`vw#?#%<+|=g=ZV^a8EQIhJyP<>(hT8_GYH
zS>^~mxQosUmKuAJTZF2pL8!<|5~m0G38A*98b5S^U_%EUo_qR_EaSTg1D_A7n|;uH
z!K}GL(cUWWl%=U<xnr2{GKkHq9IJp@%PCVmRlDHc--&oNaLYnjhJ_nFs4aBHfTZrb
z5mL?Uxv`^&3g9m%)G=Xv%<}p;a+E;%jLNdH4E9#WqinzqvizyB#fgltZUh34&BQ@@
z7W}2QX4`b`Rxwn|8K{fg72G?jb_PEr4K8i3xJ8u9L`b&L_X}J?_dYQ@Bd|=n2ITwn
z1P5G2JwW(?t&)yNFGG5U8q{d+DE13bQD87vpM?FSGCvq}d(*#o$GkuE-l0pl4_jIv
z+;NI0ro*N1KIZu2b%@)EW+mtbT^!0}yUF}F`kT#6;eOc99bMegCzd_nQDV_Lty^C8
z)_{XR$Uc4|hE+PnU3OSHJ0mm&UZA`d@ezUG@A`q*<ghOr3R>fqmuMmYDO&=Ng=%h!
z85d&t8*$Olmc_yi=`5Qu#L+1%u@P$!s_Ii4GK0jjwp2J7&Si%Y<OX=+W4Q57b2IPJ
zyU{2xJAjq+G=M=3${a@dwhV3&yu(?J2^GCnT^IR2gk4S&g9DXF80D1c;eT@MD{;U@
z-Mnh!E=o060_Oos4<C9FXtq3`<QvcE{{T><k84OPYHa~eea$5`nmE6@f(W9IK-ac^
zVHV{!T|$ov%?E2uCK1d*w`<_FU_zj>;Z?uy*okcuOKP@7v;ly+v?>arEHhi`G*Ya4
zJ3r5y!%L-hrYmQ4l7iJ%LlDNYtgQLR%oPQ?uorZXf&TzvRO0K&HJ39oU8sW=%uSJ`
zMWhZ(acZ^i9A)}rcGzn$;A;$}E?_0+uM~wPu*NZ2xbnr9a|(VKLxNy06*8;lUSOzf
z-}D$zD4b9O3&B{7y5#wOI3d@-2ktaS(yBlYqo(=BA^w^HMMKp!QoR6sMehgH8HZ3<
zuOo4fdl2%tEnfpI=)J9Q`6*T~0fkvxJhlKaieBXcM|Btq%IchDv&73|CFmp?6<f)*
zL?no#L2_rD;^pQnrmknqBdKHaU-#-=cK8IN2F!t`t#e5h@ScnNDXnPKBpTS+EW767
z>*Mtn_dDBVE*W8%!I<5{h$dJDmP6B0trSlhSF&SVe8sb=Z*jW^ynf@Gmk^-?V_w;U
zQ{r|ajmKn5w42gmR?+WKvbSP)D#2>SPWwj#1FB=e6)^;OA=@bo8ON3&Hr&djPk2)y
z^O!(KJB(D8tritK-4y;(;YCY1c1DD|&jzE}Nux4>SJ<obP!^CHKv!k)^AN2e6jUXB
z*Bz%51_em#1^)ndF)5i)81*TI_iVQV0!q%)!A0ymgH-4(Fe3AeN+u^5Gb(f;{Y1)z
zVGg;uRqdJXmSdS=Sn3H9%|T|`mX^VKi1RDs>KTg<XvKe0>bv@z3j|@D`U88HbM-PZ
z`f%nOWT~qow4+h0P&NStWoQ~PP{L6p$zq<zW8{MB90T==9?(7N1b^4=k2UTg^;h*0
z#!O`&*_TB;{{WN}cVO5>kobuL*J*&i{!~@~GPQ65unQ0?-h6=-<`x43MWn(S33;}V
z4gfv;`^%KsI8F;YapZ+ft!q{|X$8CkftL5&jN9aF_F-fdiK&KGlBLR}cOHzXS}C$w
zcwxMzBWOY*g;=IH2*6PxNDMv7`eO55P<7+`f(}PA>%?2_EZ5pOfIbMVp#(V<DcdbM
zgLW)2oOowgeae(!+a2v@xNcMCUJI!6k}joKlqGRFP1q$TF`h^YT*ivPQf!7+twmg{
z8xQ6Mb(rBWK%0Fs@ej&1NAi^e_oEsH<*KjVF_b9*0+z0+lukLgE-(eahXr^mF`sb)
z9#E=jP^Rw(g}g*HYhnk3U=D`C*lz(v*-LJK@(t!V6SUiyFPftI5lN=$h$-2qhK^Je
zge#VaXB3-2b&PANL|K7p$R5=D5at4n5gZjQmatSJ!lNolUvt#Cn?oHStW)hPcA|UB
z6s~_zO7i!dy#2?nexev*8n^(|d=lftX0<HwFflB=!oFkf_Yrj4E*v*JsI|>QW?dB{
zOfvnjh?FLhwpgdUE7I?trg~)qT?+Fr=BLS0_#ns<oTmV4xTdq>D+g>^-5GwOvb=>g
z%xO(iG?W&@jObgxImKozFXo^qRyeMPD~_=MKz9P9`;1E|T@u9N<`gUrtG$x1hAxUX
znQx~f!b^MDW2#?VmEi6>LG>79e&w}zB~r2T0JOLZXn@{+ra_#3;uT#nv_8?;%M=`W
zm~eec%W(4;LjK?)(s614aRYoXNLRdd2r4D!HRdQXr|wa0p3sG)4S@%;qN##i+WbRd
z%x`vk%@RP>2tOyF72ajccjg*9nU)|hxYx}eENZSo?f(F7*;3MK%B8fgaO1*S`_b85
zV&<|C4jrB$?T8dxY})o|EY-^l176cOx57cnz)v6iVx?G$pO3h1m@Y9NOC*9sg{2Ms
zxa188*WD_gW?4$OgIbjtVIV@frZ-o8N6tTST@h{7A2DR6rZ-m<sY~E>i;1KH0Ym$l
zOp>kBEoZiZ%Yz8+9Hr6pCc>qzrTjuUMY)!t28?l-6G|n!Yu;j8@JDm~%4)`;B8+zn
zfepe_1WO|OKHKz|s!ADMbNti+z&H_EXG}q5f6Cuc$aF4CPLwc?K-p^Vu(h(OV($4Q
zRmilE&RQ8@tw)xl`@?tY<1}Xx<@uVPO1^*OZcU5PdxE|dcP?W9IyN6aW0N`Y3dP@1
zMaEm*5~-l-5S7)rmF-OBrOjEH#mAC9YYbZQ_wE>G$TeVo;RXnt5mjOWY90iuH593L
zFsn5U$?*as;E(9}lpOx%+XP|1nM&{VGpa9q6PBaI8+4ho0Tf(96<KH@rsAZwWsDBx
z&=#QWTqV1Eo6C+9a$tCs78I+19u^e$9D`hkam#PEC<eHeD>f;KLm@CL(n6G~Zsk<g
zm>wSCmEO|=yO<7^r@i0e3+ikHP?GZDfZSdqs5*T8sJ7d5)Ag9;sC&RTq7da?pcp^-
zHdVwH?prjjRQZaTqnek-NtK$u+2?-ZO8)?2<1?8MbbZSj`hx0E?th|<Sjo9`U|bQn
zECy6oXKFtU#p%Nd3zt%(0D6r-q`*#M=_8t%3nEc_@e9?A(90NOuvJh+9MEyRhW@`&
zh4_yFJoLVRqlyH-4j1mB{S(LzTDHXiei1UFYlXaUzPtlfG3xkUfAOFTa0HvxTBake
z>8FTQ0uS@FvCtHYVrIiz15hG;$dT{fX9==)8!z~;68i=75K@$CJA)qVE5YzF=Ie@>
z@d%=&Uel0%vkU7!@J>iQ<z)fwj!*PKz5K#7`GzWn5YRsGsZk?xxYe73)DZ~WVj5~>
zse^%w(-Cg1g)9}~Tha}0?HnC2CK95$qY8#~3K$XNAowIpg+PrXWC5Fg@PEQs_8h3Y
ztW1ZbVUx)Xy3KFE#R2zW`k0XfuL<l_0)S%DBFan3y#pdLod*>ZN?}>!?$pVI^=mSR
zi+IJvcWC?%_Y6K^e7#L%#o!SAc$Bpjs<oHBSJTB!SUZ<tYFy^KgajXnT$Pjuh8v6R
zFDqV9$K8(^M!8kR&4yiEZqfG39A*Wl5HAe(Apz1_ASleFXYK&ymF}k4PLc-UAPU6A
zg!qakt{@Gcb0U)c=OkaeFb~=r75k0;WM!@1S?12CBl}Ln1(d))(k1iE$ntgL=o$WE
zMyZiihf?OWANFaYCHIeMYvUVHs@Jdf4CHAJ*!ol`*yB|Ji?$iaJS7|^Osl1O)vHC(
zcF~qz&ZZT_aqLXP1;WZ;OWGz82k4;b>TIS7m5LsLPAJksLW6zM@dN<W8X~-`v76va
z_Jl7lG4d_<nMhn*&=vB+qEmo@${i{!S6=Z0{n6%UctFhJBr=QGzT-w;xLUM|>3$id
zU$~{LJ>qF?k2H}I58U$M3%}fPH}7)N$R;ZOCZSA1zl2>>J|G-^ld)&CM>&@4xK}f0
ze-hT)mLC-`k@$f5MuiIxVxO7Z4(a1@sDpLXyEG~A<~{5Ripmdc=ftltGi!rqmWHyV
z2q*<kAh%O%sx3A#f(2Uh{+uN*aH({{%#B>MUTNT6TqyG?3f8RA&1byM?9~BrS$W1K
zisi#GsZwU<kan4ay1$qevd$RA$BT@X5ldTr%a%cQwH3Pf+Y_6(xZeZeCAsD1Q9L4H
z^_ZC;n<bQBVZwVcAlL2+f)84N3`0Y4VcZop7Go6%LcXH!A?*xC<`ZQ_IepCwm8bwj
z4yCXm@gI|^O%G}s62}Olbe%9(CFZCrtJ<kW=q0oOIqwhi45F*_VS`5*53;UTDy)&o
zcu#NJFa`iqEs;af$&f{K3>ZOcCM#}5;*+JbZQx8}t+2GZU`Fu;oEPOIU(p1ss4|Xj
zvlV>i8%7480i*s#4tc3?6&PqM-W*aI!740Y5{Ar~%xvKE6%h)#bIc59PUru`03s0p
z0R#d90|5a6000000000100ILM5E29v1t2g$P;md+00;pB0RaL4{g;25)!qA_+joCq
z^FP76{^jED(0@k$_x&sRU-&_LU6S)JV1M)chc*1TJii~3=egB@TJPqQ<cx|j<N1wo
z9*r$OOZs2q@^MbqL}T&Xnj<#TTIw05oqB1fUYdHJ)ym|#^0o1pIC3<fR;X5r_uM~i
zx9#lg{k)!KyO+avM@v)33Tq({5KtdWChqU{RdTs<a%1H)nah9@$C-~9waA>n++E%*
zoL&#*s!o3=nJnN=<~~krb3R8o!X*pJdvO$ZWZmAy`|Y;dX1^gN+^HsfFf+wJ!?VN5
z2CpQsv6aS}$_xjyvvVhDki5yh`|rQvD)!rZQp+ro4lfuLBPQfJe!%F=Wo;C%U|SQh
zkiUD|YujzMRq0h#Rh!#l=RdQ3!}}+Yo&<f3cz0xp{yUq-*^<K0c@5%9(q=OmT`8Bb
zvKe0|Z*OmA`(z5LK7^NSUg>*#H!j-tw>|EV#ybSZkZ11h=8bsl<4DygXk+`kJ9|4f
z-?x3X+hFbetg5OYw6)B9Wu<Ml+g-es3rQrKybweKi1s*#vMC<(o6S{KSEYPNqQe}F
zb9K{7r<b}m5=l2<1Q7t38zUSTTgzbuS{|2R-Q5xGRam4^1)UuoFJsf(m=Q%)Qfg?`
zkvYM!J|jDME=3hywLn%6wcXv`N2A><$#a}|;T2V>9kE3-pofy;bzo>JW|c~+wbrVt
zs)(wud%yr36(N<+B}nKRLbCVEE4UE8MzyTFC>E|NcyP#9cZ#4Yu~qL%fDIuLJG)%s
zTF^nkPcjH&<US?@Rgw!&3C|9}S8B6%6{0YHK?G6EjG3T<745NcbKT>3<nh{53*6Td
z3l|Ych$A3kl>ti1B>e^tquBod5dQ!Ue0k#Z>8_532&HRULmkc#Jdjv;Y3PE|DzjCZ
z^*;502q=$Ii%d?5(H6ql6G2aFU*ty>6bnxf$_gloqAHcQ04hZvm_ZyLi4SArjih1i
zzarQg;?l#%GlGdK@D)`7RY;?X_zYUVdchKeGVmMEOioQvn2#tu9Fuo=9;H<lkzTiV
z>r_R06)E(4myi*nf{ih9)PWUwtFW%`7EwpzRDiFO&!dj~@fs*7lN8E8QAA)OtRaoI
z_o}L@s;u3swRd43V(v;}DxyqXvL`E7V>SsuYl<S3eJZM|s;N97f~*--XyPRv9!e)|
ztp!1$5)Jwvd-bZNSDM7&t8;^bb6jI2g&Jd=;#>%-%T=P)wPvebYO1xWRE7$w)~c$f
zY9+`Zw3*CAu`>jMRRBOi5(OZf+W4GUKM{g%?+;{xLJNz-WT0bBMO8T2SCZ|RQ7iw%
z05%Z-0RsUA0}2NK0s{a50000100I#q0}v81K_F4^6CyA`VR3<>vB3l*GEkAx;X?5g
zaDe~X00;pB0Ruk(`^<mg!~XzE0E33ZLOKNHIsL<tI9Z<_YSM8RamSjpuyAYY-!PkM
z!0>2L3C_=FO}YC#30SRN9f;2x(R=B}s99PI5@_#V>c%H-ddcd)G?S=A3FEao$W&Fp
z9}N-Ja`qsc@;(0m)i>*Qs;YL1D5Lf&#;90=*8N_e)P5y|VKOU~Gstf~h`GYextzRE
zZ#AZZgSb#9FiFV^D_lA|*F`M_N{U_hx+sH;AP_+f5nIqrzj|+87H0*Y)#?32g5jEW
zE=)F0WmT)3c|i>w-?Se5M@fVa)SM-zp<yHi$}j_&6YX^r*Hx~I@`r+#PugG>S)U)$
zEi?*SE}>R-Vb;y4$c^K%3o9H}G%_Q{dUq@YXP)HNri!V_VY6>#e2{n!X{FF$sIH1B
zs9tIti+|bL!6g?66bf}l(ZJ{sqjf^aw7f_LNLK#<7(r`5jVC$%CcKvFG*YEvFu9<2
zEDr);A0z!iViUPy6Twg<tZtK#+Xx|eL#%+OCH^~+cB<RrAAyaN+3VQ8BnpA~R__Cx
z(b->EUkcMUgt|Lmd4AQ7Hya@h;<Zd^puSC25HvX=L9=yZil{4-cVfoq(~nddAZ2(%
zu0_K0aI<Xci<FSqsZIDxECW4z{{TZv&83@|+{i_bHg6b6(MVE)3nf%+?IvQpcyB`0
zf#DlKAz=_ND58pW)36Zgg%odrB^MbhRf^Qx9GjPa^65bUtgVI{R)-GJE8tyGXC1_k
zb^ibs*9$7sQTp&jIFQk*!rUcH6$Gp&3CoX9bplgRLwXkkB?K=QPeyh^0SkhinWB{f
z7bL*J!9*yyqKf4#@f|J!<=ce&d!pw%vKES4JaPL)kDfTyIo59M1bJqOMIC|>M52l}
zn}P@gIx>hvB8f#Cr9e|vjrfj3)g8P{AyyYSjpO_5u-X>y9+grgAmIvA@rLgn$h_h?
z^hF+}Hx1X6QQU}8b`c8$K<FW*3{G+((lbTx$7eyGcW3LdFqmPJs+1hLHv+I*IBbK_
z4x=a(5C{cWQAIUerJx6(luJx0QZc%X^F7MZ)D4F2{qH~-i0(ovRcg7RIqn78Q=})W
z!iA|sl>t<{Vo^cAQH0GZtu1Txk3R*3a&~!opz{YHv0H|Wnmo_(NJT;-p$khQREhf;
zQv5rXm$kNHO?#6!rgp0|(U3+dL~}yhp6{f2m4T(lPB`yQbTm|<X51lijxQoAe6Oma
z#>Y2`h0RsiJ$PB!x#vK!DMeOYwFtb1H!@)};q2RH+=3W`H&^sM)&|MVR;vv={cozG
z`HpJLZO>e7s~u+Sh=pnJmYj{ZfzXZPiYcf=V54;wH#*(@!`%CRZsp!>j#K`Z0Rk!H
zu)Kq$k2N<{b63sqRXrrA>g=^{3r2!B8n1>mvzGT6AA|c=qPBK`_kurb;@kc++(P)4
z=Vn#7&4=)<25mouX}G7K@hZ^JH46(c!J>28SPe}(xCk+!t;i{gAVf^}P;19}EVXjP
zE~@5*^M{J9RP<uu)};%fLYHFPsr^tQ>ChhaB!cOwpdG)_C<{RYG3csps-lZ#@uG(j
zc-c3fumg4wq~tG=^vWT`5&-kb2Nn_9q2}o-iY=IXR9ZN8-ydlKw`+JHNR&gQG!I<A
z(l}a4)UJKiR8a6#T9wW_QTASRZrK5*N$^7uA*A;Bo;#FTG{jfgbG9lJmbhvadhzLH
z=Vak9=uIRXN5jDJ*uG50p>waQst&Sax?MJ>tn-f14d+?`+&I?05SsfJjMqBqs;aM-
z$&uZCdNJ3b<ZQ}wW}sJ(A9aq+;}p}`bFQt`*o%$E<8&gANNznw91i=r(4Z*_bVgio
zkKcb91X{vD6bfB8R^TR33lbXiK}T08?c!waQlhG=2?|N1rjmKTol)f6(nd$oVH0&#
z6a;of!3>n#bhshbVI7dymW>doL>W^B5NLoBYKf!_moTa-s%=KeM)P<7!~i1^009I7
z0s;d80RR91000000RRF65d;ztAO#Z?Ku|CS|Jncu0RsU60s!CzUx`#jRdpzeq!9eN
zs#Vgsug|V)nxXm^#gu=&Bm67s(F&`zcdAuf*YG_GKWw5afU3TU`?Xz%)IF(J@*=;H
z;PxMMMD}vn#3m;*n(luS2NAGMgOQ5XHJp&)$^4)&LsW+&0JI*D4(6)6AHwnPiP3>S
zk;XvUvK%bD$YnEo{lZ^im)-65ef(@=8x(Rfe8?OE2bT<zatxQRuS!~a{d)b}Tq42a
zvm<2RCT77Wkb;3gm07?F;;Msn0;}y^<6`4sKN<cn3VLgp+(`0Ef@^hEoh1Mpt!f2o
z1y|Uz;4yIVaSF$dX2asA#f~`G$4;hTQM`)E$Y^kCs@G5|xUZ~S=}fJD3|O#9kuRUe
z{{T%d`h0IAHcTQm%=v>Kkr5G@q<Ystz|hS!&@)YS(}DDAoE;f}#gC7V6k|Uz$f%zN
zJ24xKBFJ>%7QQhgl54AD^@awJ0B)tBpC98Z2LAw+R+=@9j6>@6mOd<aVP}$*<4X^%
zutM0pwhnzs22;p^*)ct%7vhXo<9u(7aYiV`7~dP?HyFdM-R-vfwcLOhBM`P9U#~{A
z0yQmbVq`*B>gYx&hu7=Hb7h`f*j%yPQ<>`Z`n2`>{eHbQ(@jfsz9@yL7j_7Em9n+>
zLAvX%u1pa(Dc}GbRCsY~o_v&2HI8egV;DKEw`E=3-Q9!I&>_u{va#(uZ?SPOvNlZm
z1BxL?fr$)>ppmV6orv%Ps_aJ^f(pC4+ikAHtPtSlN-};($YaT4V{DE=4x~syGBCPK
zjAoX*5Oh6m`VMNf)Hhvq*IMGxQ8A5CwlSFDz&mURhH0W~(_%CLT4#yN8Q`n3uWh!q
zcWU;#y9WB%B(+dcAebW&Hy~-CXRWcs%<0)1ocG8OOhsjg%LwUJM}ltc4r~yI7k73^
zAc7DuA6ppE=Q+gi3q(p;F_`x_y-*BJS?y|P0QIWWg;ouY2R4T!l7a|v9FV54w_SA5
zMk9(pEt0Yv)hlFa05CacfSbrxRV%VdwLl;`g-D~+a1?Sg&9lxo$~Q0HzW(2nG_e;s
zrR~OUL}`KJJOsiWchIZpXeyvM2q25B@(4q)EqIxewm|HMH4>seuN=IPXlQuO3@kwf
zb`ICU;F|;>6><<%M-+gd5EWe-=19pN>*5FN;?8+sV0g_DnE4!)cVzKkH4edDqAQx>
z?|`b1EGYAiUjG0RGlXf0<0wL5?L=}x;=#KGUmyWog?GT<m1Llz9WmK{G5P6<iR&oB
zvC0CEHmM#efUXBXI$hn^J?TU_3J};lW3y8e42vkSV~~NeQSDp~fONY@sO}ZoXB3*O
zHx0R=sx>OfMUWA}TG8Us3hf28p~&Lzn(1op?)ZqKzyvwajMUW<nPe3b1R@Yb7S)mg
z><Bik1$V`uZ)*pr3Id2|qFX2#iJ~<*$N)8fHC>VkP8wFquI}$`_uF9Ywhpy?f)3kl
zVO_%H15pl4ngx`a7$+qFfxEJJCu?#K+QI2ulm!9M4l95Fsz65eIiR2;lUY#>Sp;(4
zwn!Sts5Za<!~i`J009F60|N>L0|NpF000000RRFK10fI+F+ovb1QQ@4FmZvA(eN@r
zp|QauP~im>LQ?TyVse0jlCuBW00;pC0Ruk({{XXS{{W3G`}rue$3f26hSGu~PuvAd
zzeP+44YH?DNU=_){IPUgn%z1QqxR__D58W)4Qh2t4yZmR-@|D9`K*S@dNQE?-8ADx
zDV0uX_(>ip3pENgh#DJ??cN@kN|nD2ib9Q02w*p%y#x?J4d@g;e$%lU#5u#O_x>05
z&TEMfZLRdw?gQ$BMsjQ~<*LZtE^bDVDwT7WM|=1APN_uOtlRi)_3&3J7TgXSRz~M2
zofFD=PCA?%G%4xmol2BO)c*jUHG1_dKK}s10EUwT=$)6G%>#KL4>GB+0Zr25tWTom
zX^9e45<u6--<VX052kOldjT4N7hHteHC!2>KF5ZbUQ~B2n8gw|k?NzSPB%yAZXZy1
zdZSgjiLxzuV7TC`)h(egb!psqk8ru(QI@`tXMMMOx?{N=Lg#52XI|(;qU8iGa<xXC
zr2FyoMW!4u&e;fyZ9v})@8;Y7KY!y1>F--UZWiR{kk;x@re)W6(06*LRCb6`r;(|}
z&3Moo`EC3sa)9>Ic2YzGv|C?A$v>u3cRJ@gF@WIN4ULBS8Tu#EXsKe;p%(|x@GEM%
z$k|fzc$LnxJW1M))sfxk3wsLixrSwL#D1K86b_O3&@m-X>Ukhd!sPshiF@4xdHZoa
z%aJ(PpZ!YYRB9e^k1dTr8ut^ma`(?N*0OkJKU;klE~iO?tNP<;5viE81zVI}q51*m
z0{Ws>)s=-@qQfc<aHK-#8o8EdI_&qsIqoibl5sr|(H|;NPAHT@cPOcoX5V~J5CRNo
zaWDdRFRW@Ggju6--`;llEsJ_>I*og@1#`5;T4ys((40yvP$d&!h>%s6IZRq?jW=Z|
zP?F${XJ&W%VoyRkGKoij$diacG-{ffn&SW<!41MFj!xZOuXpl-%503O*sF45nuE(v
z0z&?#+ZvPJ)}O#%-BOyCaIuX${8L@EQWEDK7J7x?Kv5w}Ld-J_ef`#cuJq|f5d{=c
zMHCQ03;+mXFgO)G2~vtB6&;1tsJUGALi6k(gOg=6AcE$qz}lfX?a91>`TQYGtm7+c
zf?Mz3q7G~n8C9U~{Gliyf(Rz2ri2hpLk9K{(1Zp70)atKL;$49g_52@srz;rUemc)
zl?>0g{{SDAo`YUL#E&Cm^;t|6k-(_3Ve4)vFhhGGfcXN7BMCtS5J6@tRF%$DC#4v5
zf;n0fT++vAY=t^}-BBhYH;w)yUMcLoK?7m+SxV}HvrH~>gHLXHMH~(V91bv}30ahg
zQlWEHNgap|5}^>`O`jIL2NSq`RR)QB<=gi7ZdrkDaNu?ye}wTaYi+>FiE5HlLm|EY
z0I+UF6(|6w2`j4Bc;gtCd?7geMim+zQPy|Rcls>noPMPmV%n4$$zJzr^REHwe##08
z)<OMYs>@I;u8x@byP&$W&~-|M&C_Cfayv@mo3#g6CsXP)^^edj&J7z=STh?0UQ3D;
zgK0IyfCrfq1AxZZWOynb6~AK6_ONYHfCxlvQ_v`sD7DpJqeS^9SE%Z))ZJLWeTV1K
z8m|p*n>hKQld3js;%5H<#jLY9k;Cncu&{EtD&IPKE~N2Mx{IcAs(k$&H*E)T_jCZd
zI}t~td#t&6Gds`Vcx#6-O|(X_{{CnUW5-16=K9-(hrR1D%r=00z)_B?^rdL6+K-lf
z7iXm6)4VQJ(G>u9c!TuxRE?@rBf9>Jk-z+-ycNlxqr#wccvO%11yB7!QyWz#0^)Q=
zwMSHTr3N@uaIn9B4>Vrl=W0{~c<`A;t}e@#Z`Qd#3Hu)r+=W_H$lV@R{4Q<Ljo0WL
zEIK3QXSIZI1xnGOT}hlEk(i0xCU_&2u5CQB1<9F~oj6#LxY+JlsFFw|IO(OpKB^uM
z3}mUI;G_bqI%BY&&w8HP6ci4tIh6w(jwHV>nh83MIPLI+(@mr`#K70#)QB7nfZSy`
zGNWAByMD*fNNy2?(r{fsrJ(%Q<-fNf7Z}54E@^RT0>@qiA3e#Xu<E3lHX3|D&yCLl
z0&Zu91EANuh1F^3bqHNRrGvK}xa>evjfuieCG`VCL75lsKF1YTn{ad5&}Q4^tv29b
zX`xffDh&CIAO44`{UCV$3ej0vbyNop#dUvbh14Au^Nr5g5+?eI(NqAiPypNFI!<Qr
z)cFuBozKCKgz5hPj;d(*kIKFsRn<{Z!@IMep52dBcXVLL)l+ps8A;Fo0I1(kc5pZE
zl=^(Cnah@BeZik7@L!^P#uiZ3)$v_y;dQ$i?I-1>gQFW&I+2;rcUSN%I9fY^O{dB!
z5W*qalm7rQ_ud!ipAWjam1STshajh+I1Q)`F&%{T0v&<utXqYpN`j+oA(4ZgA_1@(
zd|!V)w*=bH0ojgXE_TVkj=m}>k=^%KP^^iFM4}NlP=`XulvF4{^v(qoQAHdNW=c3p
z%JDl?6*j|ag!Z+t_J}sM$HZsf1Py9#%t-`yd|!~Iq<hpTvbGXX>|+77H4mG&LP9QN
rlx$GZScC|YoWl)^@Ccii8+x?tQuE@jtgq0XjnPx804u3NP+$Mq4niwz

diff --git a/research/visualglm/layers.py b/research/visualglm/layers.py
deleted file mode 100644
index bbec8111..00000000
--- a/research/visualglm/layers.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file was refer to project:
-# https://github.com/salesforce/LAVIS/tree/main/lavis/models/blip2_models
-# ============================================================================
-"""layers for visualglm"""
-
-import mindspore as ms
-import mindspore.common.dtype as mstype
-import mindspore.nn as nn
-from mindspore.ops import operations as P
-
-from mindformers.models.configuration_utils import PretrainedConfig
-
-
-class ImageTextEmbeddingConcat(nn.Cell):
-    """
-    Layer to concat image embedding and text embedding
-    """
-
-    def __init__(self, pad_token_id):
-        super().__init__()
-        self.concat_2d = P.Concat(axis=1)
-        self.concat_3d = P.Concat(axis=1)
-        self.not_equal = P.NotEqual()
-        self.ones = P.Ones()
-        self.cast = P.Cast()
-
-        self.pad_token_id = pad_token_id
-
-    def construct(self, image_embeddings: ms.Tensor, pre_text_embeddings: ms.Tensor, post_text_embeddings: ms.Tensor):
-        pre_text_embeddings = self.cast(pre_text_embeddings, mstype.float32)
-        post_text_embeddings = self.cast(post_text_embeddings, mstype.float32)
-        concat_embeds = self.concat_3d([pre_text_embeddings, image_embeddings, post_text_embeddings])
-        return concat_embeds
-
-
-class ImageTextEmbeddingPreparationMixIn:
-    """
-    image text embemdding mixin
-    """
-
-    def __init__(self, config: PretrainedConfig):
-        """init method"""
-        pad_token_id = 3 if config.pad_token_id is None else config.pad_token_id
-        self.image_text_concat = ImageTextEmbeddingConcat(pad_token_id)
-
-    def to_text_embeddings(self, text_input_ids):
-        raise NotImplementedError
-
-    def prepare_image_text_embedding(self, input_ids, **kwargs):
-        """ prepare image and text embeddings """
-        attention_mask = kwargs.get("attention_mask", None)
-        position_ids = kwargs.get("position_ids", None)
-        input_position = kwargs.get("current_index", None)
-        if input_position is not None:
-            input_position = ms.Tensor(input_position, mstype.int32)
-
-        concat_inputs_embeds = None
-        if self.is_first_iteration or not self.use_past:
-            image_embeddings = kwargs.get("image_embeds")
-            pre_input_ids = ms.Tensor(kwargs.get("pre_input_ids"), mstype.int32)
-
-            image_embeddings_length = image_embeddings.shape[1]
-            pre_text_embeddings_length = pre_input_ids.shape[1]
-            post_input_ids = ms.Tensor(input_ids[:, image_embeddings_length + pre_text_embeddings_length:],
-                                       mstype.int32)
-            pre_text_embeddings = self.to_text_embeddings(pre_input_ids)
-            post_text_embeddings = self.to_text_embeddings(post_input_ids)
-            concat_inputs_embeds = self.image_text_concat(image_embeddings, pre_text_embeddings, post_text_embeddings)
-        return {
-            "input_ids": ms.Tensor(input_ids, mstype.int32),
-            "input_embeddings": concat_inputs_embeds,
-            "attention_mask": ms.Tensor(attention_mask, mstype.int32),
-            "position_ids": ms.Tensor(position_ids, mstype.int32),
-            "input_position": input_position
-        }
diff --git a/research/visualglm/qformer.py b/research/visualglm/qformer.py
deleted file mode 100644
index 8f6cd3a6..00000000
--- a/research/visualglm/qformer.py
+++ /dev/null
@@ -1,997 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""qformer implementation."""
-
-import math
-import os
-from collections import OrderedDict
-from typing import Optional
-
-import mindspore.common.dtype as mstype
-import mindspore.ops.operations as P
-from mindspore import nn, Parameter, Tensor, load_param_into_net, load_checkpoint
-
-from mindformers import MindFormerBook
-from mindformers.models.blip2.qformer import BertPreTrainedModel, BertOnlyMLMHead, CrossEntropyLoss, BertModel
-from mindformers.models.blip2.qformer_config import QFormerConfig
-from mindformers.modules.layers import Dropout, LayerNorm, Linear
-from mindformers.tools.download_tools import download_with_progress_bar
-from mindformers.tools.logger import logger
-from mindformers.tools.utils import try_sync_file
-
-ACT2CLS = {
-    "gelu": nn.GELU,
-    "gelu_fast": nn.FastGelu,
-    "relu": nn.ReLU,
-    "relu6": nn.ReLU6,
-    "sigmoid": nn.Sigmoid,
-    "tanh": nn.Tanh,
-}
-
-
-class ClassInstanter(OrderedDict):
-    """ClassInstanter for OrderedDict func-mapping input.
-
-    Args:
-        OrderedDict : function mapping.
-    """
-
-    def __getitem__(self, key):
-        content = super().__getitem__(key)
-        cls, kwargs = content if isinstance(content, tuple) else (content, {})
-        return cls(**kwargs)
-
-
-ACT2FN = ClassInstanter(ACT2CLS)
-
-
-def recursive_apply(module: nn.Cell, function_call):
-    """apply cetain function to a nn.Cell
-    module, recursively.
-
-    Args:
-        module (nn.Cell): model input.
-        fn (function): function call
-    """
-    for submodule in module.cells():
-        recursive_apply(submodule, function_call)
-    function_call(module)
-
-
-class BertEmbeddings(nn.Cell):
-    """forward the embeddings from word and position embeddings."""
-
-    def __init__(self, config):
-        super().__init__()
-        self.word_embeddings = nn.Embedding(
-            config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id
-        )
-        self.position_embeddings = nn.Embedding(
-            config.max_position_embeddings, config.hidden_size
-        )
-
-        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
-        # any TensorFlow checkpoint file
-
-        # delete for visualglm
-        # self.layernorm = LayerNorm(
-        #     (config.hidden_size,), eps=config.layer_norm_eps)
-        # self.layernorm.shard(((config.parallel_config.data_parallel, 1, 1),))
-        self.dropout = Dropout(1. - config.hidden_dropout_prob)
-        self.dropout.shard(((config.parallel_config.data_parallel, 1, 1),))
-        self.concat = P.Concat(axis=1)
-
-        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
-        position_embeds = Tensor(
-            [[i for i in range(config.max_position_embeddings)]], dtype=mstype.int32)
-        self.position_ids = Parameter(
-            position_embeds,
-            requires_grad=False
-        )
-        self.position_embedding_type = getattr(
-            config, "position_embedding_type", "absolute"
-        )
-
-    def construct(self, input_ids=None, position_ids=None, query_embeds=None, past_key_values_length=0):
-        """forward the embeddings from word and position embeddings."""
-        if input_ids is not None:
-            seq_length = input_ids.shape[1]
-        else:
-            seq_length = 0
-
-        if input_ids is not None:
-            embeddings = self.word_embeddings(input_ids)
-            if position_ids is None:
-                position_ids = self.position_ids[:, past_key_values_length:
-                                                 seq_length + past_key_values_length].copy()
-
-            if self.position_embedding_type == "absolute" and self.position_embeddings:
-                position_embeddings = self.position_embeddings(position_ids)
-                embeddings = embeddings + position_embeddings
-
-            if query_embeds is not None:
-                embeddings = self.concat((query_embeds, embeddings))
-        else:
-            embeddings = query_embeds
-
-        # [bz, query_size, qformer_hidden_size]
-        # delete for visualglm
-        # embeddings = self.layernorm(embeddings)
-        embeddings = self.dropout(embeddings)
-        return embeddings
-
-
-class BertSelfAttention(nn.Cell):
-    """ BertSelfAttention """
-
-    def __init__(self, config, is_cross_attention):
-        super().__init__()
-        self.config = config
-        self.dtype = config.dtype
-        self.softmax_dtype = config.softmax_dtype
-        self.compute_dtype = config.compute_dtype
-        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
-            raise ValueError(
-                "The hidden size (%d) is not a multiple of the number of attention "
-                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
-            )
-
-        self.num_attention_heads = config.num_attention_heads
-        self.attention_head_size = int(
-            config.hidden_size / config.num_attention_heads)
-        self.all_head_size = self.num_attention_heads * self.attention_head_size
-
-        if config.parallel_config:
-            dp = config.parallel_config.data_parallel
-            mp = config.parallel_config.model_parallel
-        else:
-            dp = mp = 1
-
-        self.query = Linear(in_channels=config.hidden_size,
-                            out_channels=self.all_head_size,
-                            compute_dtype=config.compute_dtype,
-                            param_init_type=config.dtype
-                            )
-        self.query.shard(strategy_matmul=((dp, 1), (mp, 1)),
-                         strategy_bias=((dp, mp), (mp,)))
-        if is_cross_attention:
-            self.key = Linear(
-                in_channels=config.encoder_width,
-                out_channels=self.all_head_size,
-                compute_dtype=config.compute_dtype,
-                param_init_type=config.dtype)
-
-            self.value = Linear(
-                in_channels=config.encoder_width,
-                out_channels=self.all_head_size,
-                compute_dtype=config.compute_dtype,
-                param_init_type=config.dtype)
-        else:
-            self.key = Linear(
-                in_channels=config.hidden_size,
-                out_channels=self.all_head_size,
-                compute_dtype=config.compute_dtype,
-                param_init_type=config.dtype)
-            self.value = Linear(
-                in_channels=config.hidden_size,
-                out_channels=self.all_head_size,
-                compute_dtype=config.compute_dtype,
-                param_init_type=config.dtype)
-        self.key.shard(strategy_matmul=((dp, 1), (mp, 1)),
-                       strategy_bias=((dp, mp), (mp,)))
-        self.value.shard(strategy_matmul=((dp, 1), (mp, 1)),
-                         strategy_bias=((dp, mp), (mp,)))
-
-        self.dropout = Dropout(1. - config.attention_probs_dropout_prob)
-        self.position_embedding_type = getattr(
-            config, "position_embedding_type", "absolute")
-        if self.position_embedding_type == "relative_key" \
-                or self.position_embedding_type == "relative_key_query":
-            self.max_position_embeddings = config.max_position_embeddings
-            self.distance_embedding = nn.Embedding(2 * config.max_position_embeddings - 1,
-                                                   self.attention_head_size)
-        self.save_attention = False
-
-        self.einsum = P.Einsum("bhld,lrd->bhlr")
-        self.einsum2 = P.Einsum("bhrd,lrd->bhlr")
-
-        self.divider = math.sqrt(self.attention_head_size)
-        self.cast = P.Cast()
-
-        self.concat = P.Concat(axis=2)
-        self.batch_matmul = P.BatchMatMul().shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-
-        self.softmax = nn.Softmax(axis=-1)
-        self.softmax.softmax.shard(((dp, mp, 1, 1),))
-        self.transpose = P.Transpose().shard(((1, 1, 1, 1),))
-
-    def transpose_for_scores(self, x: Tensor) -> Tensor:
-        """ transpose input for scores output.
-
-        Args:
-            x (Tensor): input
-
-        Returns:
-            Tensor: output
-        """
-        new_x_shape = x.shape[:-1] + (
-            self.num_attention_heads,
-            self.attention_head_size,
-        )
-        x = x.view(*new_x_shape)
-        return self.transpose(x, (0, 2, 1, 3))
-
-    def construct(
-            self,
-            hidden_states,
-            attention_mask=None,
-            head_mask=None,
-            encoder_hidden_states=None,
-            encoder_attention_mask=None,
-            past_key_value=None,
-            output_attentions=False,
-    ):
-        """ BertSelfAttention forwarding """
-
-        # If this is instantiated as a cross-attention module, the keys
-        # and values come from an encoder; the attention mask needs to be
-        # such that the encoder's padding tokens are not attended to.
-        is_cross_attention = encoder_hidden_states is not None
-
-        hidden_states = self.cast(hidden_states, self.compute_dtype)
-        if is_cross_attention:
-            # [batch_size, vit_seq_length, encoder_hidden_width]
-            encoder_hidden_states = self.cast(
-                encoder_hidden_states, self.compute_dtype)
-            # [batch_size, num_attention_heads, vit_seq_length, attention_head_size]
-            key_layer = self.transpose_for_scores(
-                self.key(encoder_hidden_states))
-            # [batch_size, num_attention_heads, vit_seq_length, attention_head_size]
-            value_layer = self.transpose_for_scores(
-                self.value(encoder_hidden_states))
-            attention_mask = encoder_attention_mask
-        elif past_key_value is not None:
-            key_layer = self.transpose_for_scores(self.key(hidden_states))
-            value_layer = self.transpose_for_scores(self.value(hidden_states))
-            key_layer = self.concat([past_key_value[0], key_layer])
-            value_layer = self.concat([past_key_value[1], value_layer])
-        else:
-            # [batch_size, num_attention_heads, query_size, attention_head_size]
-            key_layer = self.transpose_for_scores(self.key(hidden_states))
-            # [batch_size, num_attention_heads, query_size, attention_head_size]
-            value_layer = self.transpose_for_scores(self.value(hidden_states))
-
-        # [batch_size, query_size, qformer_hidden_size]
-        mixed_query_layer = self.query(hidden_states)
-        #  [batch_size, num_attention_heads, query_size, attention_head_size]
-        query_layer = self.transpose_for_scores(mixed_query_layer)
-
-        past_key_value = (key_layer, value_layer)
-
-        # Take the dot product between "query" and "key" to get the raw attention scores.
-        # key_layer.transpose(tmp_shape) [batch_size, num_attention_heads, attention_head_size, query_size]
-        # query_layer: [batch_size, num_attention_heads, query_size, attention_head_size]
-        trans_key_layer = self.transpose(key_layer, (0, 1, 3, 2))
-        attention_scores = self.batch_matmul(query_layer, trans_key_layer)
-
-        if (
-                self.position_embedding_type == "relative_key"
-                or self.position_embedding_type == "relative_key_query"
-        ):
-            seq_length = hidden_states.shape[1]
-            position_ids_l = Tensor(
-                [i for i in range(seq_length)], dtype=mstype.int32).view(-1, 1)
-            position_ids_r = Tensor(
-                [i for i in range(seq_length)], dtype=mstype.int32).view(1, -1)
-            distance = position_ids_l - position_ids_r
-            positional_embedding = self.distance_embedding(
-                distance + self.max_position_embeddings - 1
-            )
-
-            if self.position_embedding_type == "relative_key":
-                relative_position_scores = self.einsum(
-                    query_layer, positional_embedding)
-                attention_scores = attention_scores + relative_position_scores
-            elif self.position_embedding_type == "relative_key_query":
-                relative_position_scores_query = self.einsum(
-                    query_layer, positional_embedding)
-                relative_position_scores_key = self.einsum2(
-                    key_layer, positional_embedding)
-                attention_scores = (attention_scores + relative_position_scores_query + relative_position_scores_key)
-
-        attention_scores /= self.divider
-        if attention_mask is not None:
-            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
-            attention_scores = attention_scores + attention_mask
-
-        # Normalize the attention scores to probabilities.  [batch_size, num_heads, query_size, query_size]
-        attention_scores = self.cast(attention_scores, self.softmax_dtype)
-        attention_probs = self.softmax(attention_scores)
-
-        # This is actually dropping out entire tokens to attend to, which might
-        # seem a bit unusual, but is taken from the original Transformer paper.
-        attention_probs_dropped = self.dropout(attention_probs)
-
-        # Mask heads if we want to
-        if head_mask is not None:
-            attention_probs_dropped = attention_probs_dropped * head_mask
-
-        attention_probs_dropped = self.cast(
-            attention_probs_dropped, self.compute_dtype)
-        context_layer = self.batch_matmul(attention_probs_dropped, value_layer)
-
-        # [batch_size, num_heads, query_size, attention_head_size]
-        context_layer = self.transpose(context_layer, (0, 2, 1, 3)).copy()
-        # [batch_size, query_size, all_head_size]
-        new_context_layer_shape = context_layer.shape[:-2] + (
-            self.all_head_size,)
-        context_layer = context_layer.view(*new_context_layer_shape)
-
-        outputs = (
-            (context_layer, attention_probs) if output_attentions else (context_layer,)
-        )
-
-        outputs = outputs + (past_key_value,)
-        return outputs
-
-
-class BertSelfOutput(nn.Cell):
-    """ BertSelfOutput """
-
-    def __init__(self, config):
-        super().__init__()
-        if config.parallel_config:
-            dp = config.parallel_config.data_parallel
-            mp = config.parallel_config.model_parallel
-        else:
-            dp = mp = 1
-
-        self.dtype = config.dtype
-        self.dense = Linear(
-            in_channels=config.hidden_size,
-            out_channels=config.hidden_size,
-            compute_dtype=config.compute_dtype,
-            param_init_type=config.dtype
-        )
-        self.dense.shard(strategy_matmul=((dp, mp), (1, mp)))
-        self.layernorm = LayerNorm(
-            (config.hidden_size,), eps=config.layer_norm_eps).shard(((dp, mp, 1),))
-        self.dropout = Dropout(1. - config.hidden_dropout_prob)
-        self.cast = P.Cast()
-
-    def construct(self, hidden_states, input_tensor):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.cast(hidden_states, self.dtype)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.layernorm(hidden_states + input_tensor)
-        return hidden_states
-
-
-class BertAttention(nn.Cell):
-    """ BertAttention """
-
-    def __init__(self, config, is_cross_attention=False):
-        super().__init__()
-        self.self_att = BertSelfAttention(config, is_cross_attention)
-        self.output = BertSelfOutput(config)
-
-    def construct(
-            self,
-            hidden_states,
-            attention_mask=None,
-            head_mask=None,
-            encoder_hidden_states=None,
-            encoder_attention_mask=None,
-            past_key_value=None,
-            output_attentions=False,
-    ):
-        """
-        hidden_states: [batch_size, query_size, qformer_hidden_size]
-        attention_mask: [batch_size, 1, 1, query_size]
-        encoder_hidden_states: [batch_size, vit_seq_length, vit_hidden_size]
-        encoder_attention_mask: [batch_size, 1, 1, vit_seq_length]
-        """
-
-        # self_outputs.shape ([batch_size, query_size, qformer_hidden_size],
-        # ([batch_size, num_head, query_size, head_size], [batch_size, num_head, query_size, head_size]))
-        self_outputs = self.self_att(
-            hidden_states,
-            attention_mask,
-            head_mask,
-            encoder_hidden_states,
-            encoder_attention_mask,
-            past_key_value,
-            output_attentions,
-        )
-        attention_output = self.output(self_outputs[0], hidden_states)
-
-        # add attentions if we output them
-        outputs = (attention_output,) + self_outputs[1:]
-        return outputs
-
-
-class BertIntermediate(nn.Cell):
-    """ BertIntermediate """
-
-    def __init__(self, config):
-        super().__init__()
-        if config.parallel_config:
-            dp = config.parallel_config.data_parallel
-            mp = config.parallel_config.model_parallel
-        else:
-            dp = mp = 1
-
-        self.dense = Linear(
-            in_channels=config.hidden_size,
-            out_channels=config.intermediate_size,
-            compute_dtype=config.compute_dtype,
-            param_init_type=config.dtype
-        )
-        self.dense.shard(strategy_matmul=((dp, mp), (1, mp)))
-
-        if isinstance(config.hidden_act, str):
-            self.intermediate_act_fn = ACT2FN[config.hidden_act]
-            if isinstance(self.intermediate_act_fn, nn.GELU):
-                self.intermediate_act_fn = nn.GELU(approximate=False)
-        else:
-            self.intermediate_act_fn = config.hidden_act
-
-    def construct(self, hidden_states):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.intermediate_act_fn(hidden_states)
-        return hidden_states
-
-
-class BertOutput(nn.Cell):
-    """ BertOutput """
-
-    def __init__(self, config):
-        super().__init__()
-        if config.parallel_config:
-            dp = config.parallel_config.data_parallel
-            mp = config.parallel_config.model_parallel
-        else:
-            dp = mp = 1
-
-        self.dense = Linear(
-            in_channels=config.intermediate_size,
-            out_channels=config.hidden_size,
-            compute_dtype=config.compute_dtype,
-            param_init_type=config.dtype)
-        self.dense.shard(strategy_matmul=((dp, mp), (1, mp)))
-
-        self.layernorm = LayerNorm(
-            (config.hidden_size,), eps=config.layer_norm_eps).shard(((dp, 1, 1),))
-        self.dropout = Dropout(1. - config.hidden_dropout_prob)
-
-    def construct(self, hidden_states, input_tensor):
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = hidden_states + input_tensor
-        return hidden_states
-
-
-class BertLayer(nn.Cell):
-    """ BertLayer """
-
-    def __init__(self, config, layer_num):
-        super().__init__()
-        self.config = config
-        self.chunk_size_feed_forward = config.chunk_size_feed_forward
-        self.seq_len_dim = 1
-
-        self.input_layernorm = LayerNorm(
-            (config.hidden_size,), eps=config.layer_norm_eps).shard(((config.parallel_config.data_parallel, 1, 1),))
-        self.attention = BertAttention(config)
-        self.layer_num = layer_num
-        if self.config.add_cross_attention and layer_num % self.config.cross_attention_freq == 0:
-            self.crossattention = BertAttention(config,
-                                                is_cross_attention=self.config.add_cross_attention)
-            self.has_cross_attention = True
-        else:
-            self.has_cross_attention = False
-        self.intermediate = BertIntermediate(config)
-        self.output = BertOutput(config)
-
-        self.intermediate_query = BertIntermediate(config)
-        self.output_query = BertOutput(config)
-
-        self.concat = P.Concat(axis=1)
-        self.concat_seq = P.Concat(axis=self.seq_len_dim)
-
-    def construct(
-            self,
-            hidden_states,
-            attention_mask=None,
-            head_mask=None,
-            encoder_hidden_states=None,
-            encoder_attention_mask=None,
-            past_key_value=None,
-            output_attentions=False,
-            query_length=0,
-    ):
-        """
-        hidden_states: [batch_size, query_size, qformer_hidden_size]
-        attention_mask: [batch_size, 1, 1, query_size]
-        encoder_hidden_states: [batch_size, vit_seq_length, vit_hidden_size ]
-        encoder_attention_mask: [batch_size, 1, 1, vit_seq_length]
-        """
-        # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
-        self_attn_past_key_value = (
-            past_key_value[:2] if past_key_value is not None else None
-        )
-
-        # add by visualglm
-        hidden_states = self.input_layernorm(hidden_states)
-        self_attention_outputs = self.attention(
-            hidden_states,
-            attention_mask,
-            head_mask,
-            None,
-            None,
-            self_attn_past_key_value,
-            output_attentions,
-        )
-        # [batch_size, query_size, qformer_hidden_size]
-        attention_output = self_attention_outputs[0]
-        # ([batch_size, num_head, query_size, head_size], [batch_size, num_head, query_size, head_size]))
-        outputs = self_attention_outputs[1:-1]
-
-        present_key_value = self_attention_outputs[-1]
-
-        if query_length > 0:
-            # [batch_size, query_size, qformer_hidden_size]
-            query_attention_output = attention_output[:, :query_length, :]
-
-            if self.has_cross_attention:
-                assert encoder_hidden_states is not None, \
-                    "encoder_hidden_states must be given for cross-attention layers"
-                cross_attention_outputs = self.crossattention(
-                    query_attention_output,
-                    attention_mask,
-                    head_mask,
-                    encoder_hidden_states,
-                    encoder_attention_mask,
-                    None,
-                    output_attentions,
-                )
-                # [batch_size, query_size, qformer_hidden_size]
-                query_attention_output = cross_attention_outputs[0]
-                # add cross attentions if we output attention weights
-                outputs = outputs + cross_attention_outputs[1:-1]
-
-            # [batch_size, query_size, qformer_hidden_size]
-            layer_output = self.apply_chunking_to_forward(self.feed_forward_chunk_query,
-                                                          query_attention_output)
-            if attention_output.shape[1] > query_length:
-                layer_output_text = self.apply_chunking_to_forward(self.feed_forward_chunk,
-                                                                   attention_output[:, query_length:, :])
-                layer_output = self.concat([layer_output, layer_output_text])
-        else:
-            layer_output = self.apply_chunking_to_forward(
-                self.feed_forward_chunk, attention_output)
-        outputs = (layer_output,) + outputs
-
-        outputs = outputs + (present_key_value,)
-
-        return outputs
-
-    def feed_forward_chunk(self, attention_output):
-        """ apply feed_forward with chunks """
-        intermediate_output = self.intermediate(attention_output)
-        layer_output = self.output(intermediate_output, attention_output)
-        return layer_output
-
-    def feed_forward_chunk_query(self, attention_output):
-        """ apply feed_forward with chunks (query) """
-        intermediate_output = self.intermediate_query(attention_output)
-        layer_output = self.output_query(intermediate_output, attention_output)
-        return layer_output
-
-    def apply_chunking_to_forward(self, forward_fn, *input_tensors):
-        """ apply chunking to forward computation """
-        assert input_tensors, f"{input_tensors} has to be a tuple/list of tensors"
-
-        if self.chunk_size_feed_forward > 0:
-            tensor_shape = input_tensors[0].shape[self.seq_len_dim]
-            for input_tensor in input_tensors:
-                if input_tensor.shape[self.seq_len_dim] != tensor_shape:
-                    raise ValueError(
-                        f"All input tenors have to be of the same shape: {tensor_shape}, "
-                        f"found shape {input_tensor.shape[self.seq_len_dim]}"
-                    )
-
-            if input_tensors[0].shape[self.seq_len_dim] % self.chunk_size_feed_forward != 0:
-                raise ValueError(
-                    f"The dimension to be chunked {input_tensors[0].shape[self.seq_len_dim]} "
-                    f"has to be a multiple of the chunk size {self.chunk_size_feed_forward}"
-                )
-
-            num_chunks = input_tensors[0].shape[self.seq_len_dim] // self.chunk_size_feed_forward
-
-            # chunk input tensor into tuples
-            input_tensors_chunks = tuple(input_tensor.chunk(num_chunks, dim=self.seq_len_dim)
-                                         for input_tensor in input_tensors)
-            # apply forward fn to every tuple
-            output_chunks = tuple(forward_fn(*input_tensors_chunk)
-                                  for input_tensors_chunk in zip(*input_tensors_chunks))
-            # concatenate output at same dimension
-            return self.concat_seq(output_chunks)
-
-        return forward_fn(*input_tensors)
-
-
-class BertEncoder(nn.Cell):
-    """ BertEncoder """
-
-    def __init__(self, config):
-        super(BertEncoder, self).__init__()
-        self.config = config
-        self.layer = nn.CellList(
-            [BertLayer(config, i)
-             for i in range(self.config.num_hidden_layers)]
-        )
-        self.num_hidden_layers = [
-            i for i in range(self.config.num_hidden_layers)]
-
-        # add for layernorm
-        self.final_layernorm = LayerNorm(
-            (config.hidden_size,), eps=config.layer_norm_eps).shard(((config.parallel_config.data_parallel, 1, 1),))
-
-    def construct(
-            self,
-            hidden_states,
-            attention_mask=None,
-            head_mask=None,
-            encoder_hidden_states=None,
-            encoder_attention_mask=None,
-            past_key_values=None,
-            use_cache=None,
-            output_attentions=False,
-            output_hidden_states=False,
-            query_length=0,
-    ):
-        """
-        attention_mask: [batch_size, 1, 1, query_size]
-        encoder_hidden_states: [batch_size, vit_seq_length, encoder_hidden_width]
-        encoder_attention_mask: [batch_size, 1, 1, vit_seq_length]
-        """
-        all_hidden_states = () if output_hidden_states else None
-        all_self_attentions = () if output_attentions else None
-        all_cross_attentions = (
-            () if output_attentions and self.config.add_cross_attention else None
-        )
-
-        next_decoder_cache = () if use_cache else None
-
-        for i in self.num_hidden_layers:
-            layer_module = self.layer[i]
-            if output_hidden_states:
-                all_hidden_states = all_hidden_states + (hidden_states,)
-
-            layer_head_mask = head_mask[i] if head_mask is not None else None
-            past_key_value = past_key_values[i] if past_key_values is not None else None
-
-            # layer_outputs shape  ([batch_size, query_size, qformer_hidden_size],
-            # ([batch_size, num_head, query_size, head_size], [batch_size, num_head, query_size, head_size]))
-            layer_outputs = layer_module(
-                hidden_states,
-                attention_mask,
-                layer_head_mask,
-                encoder_hidden_states,
-                encoder_attention_mask,
-                past_key_value,
-                output_attentions,
-                query_length,
-            )
-
-            hidden_states = layer_outputs[0]
-
-            if use_cache:
-                next_decoder_cache += (layer_outputs[-1],)
-            if output_attentions:
-                all_self_attentions = all_self_attentions + (layer_outputs[1],)
-                all_cross_attentions = all_cross_attentions + \
-                                       (layer_outputs[2],)
-
-        # add for visualglm
-        hidden_states = self.final_layernorm(hidden_states)
-
-        if output_hidden_states:
-            all_hidden_states = all_hidden_states + (hidden_states,)
-
-        value_list = [hidden_states, next_decoder_cache,
-                      all_hidden_states, all_self_attentions, all_cross_attentions]
-        return tuple(value_list)
-
-
-class BertModelVisualGLM(BertModel):
-    """
-    BertModel adaptor for visualglm
-    """
-
-    def __init__(self, config):
-        super(BertModelVisualGLM, self).__init__(config)
-        self.embeddings = BertEmbeddings(config)
-        self.encoder = BertEncoder(config)
-
-
-class BertLMHeadModel(BertPreTrainedModel):
-    """ BertLMHeadModel, the main model for Qformer
-
-    Args:
-        config (QFormerConfig): config for qformer, see qformer_config.py.
-
-    Raises:
-        ValueError: config type Error.
-
-    Returns:
-        a BertLMHeadModel instance.
-    """
-    _support_list = ["bert_base_uncased", "bert_base_uncased_resized"]
-
-    def __init__(self, config: QFormerConfig):
-        super(BertLMHeadModel, self).__init__(config)
-        if not isinstance(config, QFormerConfig):
-            raise ValueError(
-                f"Parameter config in `{self.__class__.__name__}(config)` "
-                "should be an instance of class `QFormerConfig`. "
-                "To create a model from a pretrained model use "
-                f"`model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`"
-            )
-
-        self.config = config
-        self.bert = BertModelVisualGLM(config)
-        self.cls = BertOnlyMLMHead(config)
-
-        if self.config.checkpoint_name_or_path:
-            self.load_checkpoint(config)
-
-        # for lm_loss reduction - GRAPH_MODE
-        self.reduction = config.loss_reduction
-        self.loss = CrossEntropyLoss(
-            reduction=self.reduction, label_smoothing=0.1)
-        self.vocab_size = self.config.vocab_size
-
-    def convert_bert_model_params(self, bert_model_params: OrderedDict):
-        """
-        convert params from BertModel in MindFormers, some param names are altered.
-        """
-        dict_mapping = {'layer.': 'blocks.',
-                        'encoder.': 'bert_encoder.encoder.',
-                        'self_att.query.': 'dense1.',
-                        'self_att.key.': 'dense2.',
-                        'self_att.value.': 'dense3.',
-                        'attention.output.dense.': 'attention.projection.',
-                        'attention.output.layernorm.gamma': 'layernorm2.gamma',
-                        'attention.output.layernorm.beta': 'layernorm2.beta',
-                        'intermediate.dense.weight': 'output.mapping.weight',
-                        'intermediate.dense.bias': 'output.mapping.bias',
-                        'output.dense.weight': 'output.projection.weight',
-                        'output.dense.bias': 'output.projection.bias',
-                        'output.layernorm.gamma': 'layernorm1.gamma',
-                        'output.layernorm.beta': 'layernorm1.beta',
-                        'embeddings.position_embeddings.embedding_table': \
-                            'embedding_postprocessor.full_position_embedding.embedding_table',
-                        'embeddings.layernorm.gamma': 'embedding_postprocessor.layernorm.gamma',
-                        'embeddings.layernorm.beta': 'embedding_postprocessor.layernorm.beta',
-                        'embeddings.word_embeddings.embedding_table': 'word_embedding.embedding_table',
-                        'cls.predictions.transform.dense': 'bert.mlmloss.dense',
-                        'cls.predictions.transform.layernorm': 'bert.mlmloss.layernorm',
-                        'cls.predictions.decoder': 'bert.mlmloss.vocab_dense'}
-
-        param_dict = self.parameters_dict()
-        for name, data in param_dict.items():
-            new_name = name
-            for replace_from, replace_to in dict_mapping.items():
-                new_name = new_name.replace(replace_from, replace_to)
-            if new_name not in bert_model_params.keys():
-                logger.warning("%s not loaded.", name)
-                continue
-            new_data = bert_model_params[new_name]
-            if name.endswith("intermediate.dense.weight") or name.endswith("output.dense.weight"):
-                new_data = new_data.T
-            data.assign_value(new_data)
-
-    def load_bert_model_params(self, config: QFormerConfig, param):
-        """
-        load parameters for BertLMHeadModel, if the weights come from
-        mindformers.models.bert.BertModel, param conversion is needed.
-
-        Args:
-            config (QFormerConfig): config for the Q-Former model.
-            param (OrderedDict): the params to be loaded.
-        """
-        if config.resize_token_embeddings and config.convert_param_from_bert:
-            self.convert_bert_model_params(param)
-        else:
-            load_param_into_net(self, param)
-
-    def load_checkpoint(self, config: QFormerConfig):
-        """
-        load checkpoint for BertLMHeadModel. (we can use the param for BertModel on obs,
-        but we need to alter the names of some param)
-
-        Args:
-            config (ModelConfig): QFormerConfig instance, which could have attribute
-            "checkpoint_name_or_path (str)". set checkpoint_name_or_path to a supported
-            model name or a path to checkpoint, to load model weights.
-        """
-        checkpoint_name_or_path = config.checkpoint_name_or_path
-        # the relevant file will be downloaded from the Obs platform.
-        if not os.path.exists(checkpoint_name_or_path):
-            if checkpoint_name_or_path not in self._support_list:
-                raise ValueError(f"{checkpoint_name_or_path} is not a supported default model"
-                                 f" or a valid path to checkpoint,"
-                                 f" please select from {self._support_list}.")
-            # on Atlas 800T A2, load the 'resized' checkpoint.
-            if not config.resize_token_embeddings and not checkpoint_name_or_path.endswith("_resized"):
-                checkpoint_name_or_path = checkpoint_name_or_path + "_resized"
-            checkpoint_name = checkpoint_name_or_path
-            default_checkpoint_download_folder = os.path.join(
-                MindFormerBook.get_default_checkpoint_download_folder(),
-                checkpoint_name_or_path.split("_")[0])
-            if not os.path.exists(default_checkpoint_download_folder):
-                os.makedirs(default_checkpoint_download_folder, exist_ok=True)
-
-            ckpt_file = os.path.join(default_checkpoint_download_folder, checkpoint_name + ".ckpt")
-            if not os.path.exists(ckpt_file):
-                url = MindFormerBook.get_model_ckpt_url_list()[checkpoint_name_or_path][0]
-                succeed = download_with_progress_bar(url, ckpt_file)
-                if not succeed:
-                    logger.info("checkpoint download failed, and pretrained weights are unloaded.")
-                    return
-            try_sync_file(ckpt_file)
-            self.default_checkpoint_download_path = ckpt_file
-            logger.info("start to read the ckpt file: %s", os.path.getsize(ckpt_file))
-        else:
-            ckpt_file = checkpoint_name_or_path
-        param = load_checkpoint(ckpt_file)
-        try:
-            self.load_bert_model_params(config, param)
-            logger.info("weights in %s are loaded", ckpt_file)
-        except RuntimeError:
-            logger.error("the given config and weights in %s are"
-                         " mismatched, and weights load failed", ckpt_file)
-
-    def get_input_embeddings(self) -> nn.Cell:
-        return self.bert.get_input_embeddings()
-
-    def set_input_embeddings(self, value):
-        return self.bert.set_input_embeddings(value)
-
-    def get_output_embeddings(self) -> nn.Cell:
-        return self.cls.predictions.decoder
-
-    def set_output_embeddings(self, value):
-        self.cls.predictions.decoder = value
-
-    def resize_token_embeddings(self, new_num_tokens: Optional[int] = None) -> nn.Embedding:
-        """
-        resize token embeddings, inherit from super class.
-        """
-        old_num_tokens = self.get_input_embeddings().embedding_table.shape[0]
-        logger.info("resize_token_embeddings from %d to %d.", old_num_tokens, new_num_tokens)
-        super(BertLMHeadModel, self).resize_token_embeddings(new_num_tokens)
-
-    def tie_weights(self):
-        """
-        tie encoder and decoder weights, inherit from super class.
-        """
-        logger.info("weights tied.")
-        super(BertLMHeadModel, self).tie_weights()
-
-    # pylint: disable=W0613
-    def construct(self, input_ids=None, attention_mask=None, position_ids=None, head_mask=None,
-                  query_embeds=None, encoder_hidden_states=None, encoder_attention_mask=None, labels=None,
-                  past_key_values=None, use_cache=True, output_attentions=None, output_hidden_states=None,
-                  return_dict=None, return_logits=False, is_decoder=True):
-        """
-        construct function for QFormer.
-
-        Args:
-            input_ids (Tensor): the indices of input sequence tokens in the vocabulary.
-            position_ids (Tensor): used to identify each token's position in the list of tokens.
-            attention_mask (Tensor): used when batching sequences together.
-            query_embeds (Tensor): to be supplemented.
-            return_dict(bool): Reserved param, not used.
-            head_mask (Tensor): to be supplemented.
-            encoder_hidden_states (`Tensor` of shape : (batch_size, sequence_length, hidden_size)`)
-                Sequence of hidden-states at the output of the last layer of the encoder.
-                Used in the cross-attention if the model is configured as a decoder.
-            encoder_attention_mask (`Tensor` of shape : (batch_size, sequence_length)`, `optional`))
-                Mask to avoid performing attention on the padding token indices of the encoder input.
-                This mask is used in the cross-attention if the model is configured as a decoder.
-                Mask values selected in ``[0, 1]``:
-                1 for tokens that are **not masked**,
-                0 for tokens that are **masked**.
-            past_key_values: Reserved param, not used.
-            labels (`Tensor(mstype.int32)` of shape : (batch_size, sequence_length)`, `optional`))
-                Labels for computing the left-to-right language modeling loss (next word prediction).
-                Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
-                Tokens with indices set to ``-100`` are ignored (masked), the loss is
-                only computed for the tokens with labels n ``[0, ..., config.vocab_size]``,
-                past_key_values (:obj:`tuple(tuple(Tensor(mstype.float)))` of length:
-                `config.n_layers` with each tuple having 4 tensors of shape
-                (batch_size, num_heads, sequence_length - 1, embed_size_per_head)),
-                Contains precomputed key and value hidden states of the attention blocks.
-                Can be used to speed up decoding. If :obj:`past_key_values` are used, the user
-                can optionally input only the last :obj:`decoder_input_ids`
-                (those that don't have their past key value states given to this model) of
-                shape (batch_size, 1)` instead of all :obj:`decoder_input_ids` of shape
-                (batch_size, sequence_length)`.
-            use_cache (bool, `optional`, default is True):
-                If set to :obj:`True`, :obj:`past_key_values` key value states are returned
-                and can be used to speed up decoding (see :obj:`past_key_values`).
-            output_attentions (bool, `optional`, default is None):
-                whether to append self-attentions as a part of outputs in the BertSelfAttention layer.
-            output_hidden_states (bool, `optional`, default is None):
-                whether to return all hidden states in the output of the BertEncoder layer.
-            return_logits (bool, `optional`, default is False):
-                whether to only return prediction_scores other than lm_loss as output.
-            is_decoder (bool, `optional`, default is True):
-                specify whether the BertModel is encoder or decoder.
-
-        Returns:
-            output (tuple of Tensors):
-                if return_logits is True, directly return prediction_scores as output.
-                if label input is not None, return lm_loss, prediction_scores and BertModel outputs
-                (except sequence_output), otherwise return prediction_scores and BertModel outputs
-                (except sequence_output) as output.
-        """
-
-        if labels is not None:
-            use_cache = False
-        if past_key_values is not None:
-            query_embeds = None
-
-        outputs = self.bert(
-            input_ids,
-            attention_mask=attention_mask,
-            position_ids=position_ids,
-            head_mask=head_mask,
-            query_embeds=query_embeds,
-            encoder_hidden_states=encoder_hidden_states,
-            encoder_attention_mask=encoder_attention_mask,
-            past_key_values=past_key_values,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            is_decoder=is_decoder,
-        )
-
-        sequence_output = outputs[0]
-        if query_embeds is not None:
-            sequence_output = outputs[0][:, query_embeds.shape[1]:, :]
-
-        prediction_scores = self.cls(sequence_output)
-
-        if return_logits:
-            return prediction_scores[:, :-1, :].copy()
-
-        lm_loss = None
-        if labels is not None:
-            # we are doing next-token prediction; shift prediction scores and input ids by one
-            shifted_prediction_scores = prediction_scores[:, :-1, :].copy()
-            labels = labels[:, 1:].copy()
-            lm_loss = self.loss(
-                shifted_prediction_scores.view(-1, self.vocab_size),
-                labels.view(-1),
-            )
-            if self.reduction == "none":
-                lm_loss = lm_loss.view(prediction_scores.size(0), -1).sum(1)
-
-        output = (prediction_scores,) + outputs[1:]
-        return ((lm_loss,) + output) if lm_loss is not None else output
diff --git a/research/visualglm/run_visualglm.py b/research/visualglm/run_visualglm.py
deleted file mode 100644
index 3e4c2db7..00000000
--- a/research/visualglm/run_visualglm.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""default visualglm runner. """
-import argparse
-import re
-
-import mindspore as ms
-from mindspore.dataset import vision
-from mindspore.dataset.vision.utils import Inter
-from mindformers.tools.logger import logger
-from mindformers.tools.utils import str2bool
-from mindformers.tools.image_tools import load_image
-from visualglm import VisualGLMImageToTextGeneration
-from visualglm_config import VisualGLMConfig
-from visualglm_processor import VisualGLMProcessor
-
-
-def init_context(device_id):
-    """init context"""
-    ms.set_context(mode=0, device_target="Ascend", device_id=device_id, max_device_memory="59GB")  # Ascend, CPU
-
-
-def build_text_input(prompts, templates):
-    """build text input from prompts"""
-    text_input = []
-    for i in range(len(prompts)):
-        text_input.append(templates[i].format(prompts[i]))
-    return text_input
-
-
-def process_response(response_list):
-    """ get standard response output """
-    handled_response = []
-    for response in response_list:
-        response = response.strip()
-        response = response.replace("[[训练时间]]", "2023年")
-        punkts = [
-            [",", "，"],
-            ["!", "！"],
-            [":", "："],
-            [";", "；"],
-            [r"\?", "？"],
-        ]
-        for item in punkts:
-            response = re.sub(fr"([\u4e00-\u9fff]){item[0]}", r"\1%s" % item[1], response)
-            response = re.sub(fr"{item[0]}([\u4e00-\u9fff])", r"%s\1" % item[1], response)
-        response = response.split('答：')[-1].strip()
-        handled_response.append(response)
-    return handled_response
-
-
-DEFAULT_IMAGE_TEXT_PAIR = [
-    ("./examples/titanic.jpg", "这部电影的导演是谁？")
-]
-
-
-def generate_glm_prompt(unhandled_prompts, history=None, english=False):
-    """ generate glm prompt from raw prompt"""
-    if history is None:
-        history = []
-    post_prompts, image_positions = [], []
-    for query in unhandled_prompts:
-        prompt = "</img>"
-        if english:
-            for _, (old_query, response) in enumerate(history):
-                prompt += f"Q:{old_query}\nA:{response}\n"
-            prompt += f"Q:{query}\nA:"
-        else:
-            for _, (old_query, response) in enumerate(history):
-                prompt += f"问：{old_query}\n答：{response}\n"
-            prompt += f"问：{query}\n答："
-        post_prompts.append(prompt)
-    pre_prompts = ["<img>"] * len(post_prompts)
-    image_positions = [len("<img>")] * len(post_prompts)
-    return pre_prompts, post_prompts, image_positions
-
-
-def handle_prompt(args):
-    """handle prompt"""
-    if args.image_path is None:
-        image_filepath = [pair[0] for pair in DEFAULT_IMAGE_TEXT_PAIR]
-    else:
-        image_filepath = args.image_path.split(',')
-
-    if args.prompt is None:
-        if args.image_path is not None:
-            raw_prompts = [""] * len(image_filepath)
-        else:
-            raw_prompts = [pair[1] for pair in DEFAULT_IMAGE_TEXT_PAIR]
-    else:
-        raw_prompts = args.prompt.split(',')
-
-    if len(raw_prompts) != len(image_filepath):
-        raise ValueError("prompts length do not equal to image_path length, please check the args.")
-
-    # handle prompt using chatglm type
-    pre_prompts, post_prompts, image_positions = generate_glm_prompt(raw_prompts)
-
-    return image_filepath, pre_prompts, post_prompts, image_positions
-
-
-def main(args):
-    init_context(device_id=args.device_id)
-    model_config = VisualGLMConfig.from_pretrained(args.config_path)
-    model_config.max_txt_len = args.seq_length
-
-    if args.checkpoint is not None:
-        logger.info(f"checkpoint: {args.checkpoint}")
-        model_config.checkpoint_name_or_path = args.checkpoint
-
-    image_filepath, pre_prompts, post_prompts, image_positions = handle_prompt(args)
-
-    if args.batch_size > 1:
-        model_config.batch_size = args.batch_size
-
-        diff = model_config.batch_size - len(image_filepath)
-        if diff > 0:
-            extend_filepath = [image_filepath[-1]] * diff
-            extend_pre_prompt = [pre_prompts[-1]] * diff
-            extend_post_prompt = [post_prompts[-1]] * diff
-            extend_positions = [image_positions[-1]] * diff
-            image_filepath.extend(extend_filepath)
-            pre_prompts.extend(extend_pre_prompt)
-            post_prompts.extend(extend_post_prompt)
-            image_positions.extend(extend_positions)
-        else:
-            image_filepath = image_filepath[:model_config.batch_size]
-            pre_prompts = pre_prompts[:model_config.batch_size]
-            post_prompts = post_prompts[:model_config.batch_size]
-    else:
-        model_config.batch_size = 1
-
-    model_config.text_config.batch_size = model_config.batch_size
-    model_config.text_config.seq_length = args.seq_length + model_config.qformer_config.query_length
-    model_config.text_config.do_sample = args.do_sample
-    model_config.text_config.top_p = args.top_p
-    model_config.text_config.top_k = args.top_k
-    model_config.text_config.use_past = args.use_past
-
-    model = VisualGLMImageToTextGeneration(model_config)
-    processor = VisualGLMProcessor.from_pretrained(args.config_path)
-    processor.image_processor.resize.resize = vision.transforms.Resize((224, 224), Inter.BICUBIC)
-
-    tokenizer = processor.tokenizer
-
-    for _ in range(args.generate_repeat_time):
-        if model_config.batch_size > 1:
-            input_images = processor.image_processor([load_image(filepath) for filepath in image_filepath])
-            pre_input_ids = tokenizer(pre_prompts, add_special_tokens=False, return_tensors="ms")["input_ids"]
-            post_input_ids = tokenizer(post_prompts,
-                                       max_length=args.seq_length - len(pre_input_ids[0]),
-                                       padding="max_length",
-                                       return_tensors="ms")["input_ids"]
-            output = model.generate_text_for_image(input_images, pre_input_ids, post_input_ids)
-            response = tokenizer.decode(output, skip_special_tokens=True)
-            response = process_response(response)
-            logger.info(f"Response:{response}")
-        else:
-            batch_size = len(image_filepath)
-            for index in range(batch_size):
-                pil_image = load_image(image_filepath[index])
-                input_image = processor.image_processor(pil_image)
-                pre_input_ids = tokenizer(pre_prompts[index], add_special_tokens=False, return_tensors="ms")[
-                    "input_ids"]
-                post_input_ids = tokenizer(post_prompts[index],
-                                           max_length=args.seq_length - len(pre_input_ids),
-                                           padding="max_length",
-                                           return_tensors="ms")["input_ids"]
-
-                output = model.generate_text_for_image(input_image, pre_input_ids, post_input_ids)
-
-                response = tokenizer.decode(output, skip_special_tokens=True)
-                response = process_response(response)
-                logger.info(f"Response:{response}")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_type', default="visualglm_6b", type=str, required=False, help='model type')
-    parser.add_argument('--config_path', default="run_visualglm_6b_image_to_text_generation.yaml",
-                        type=str, required=False, help='config path')
-    parser.add_argument('--device_id', type=int, default=1, required=False, help='device id')
-    parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch_size')
-    parser.add_argument('--checkpoint', type=str, default=None, required=False, help='checkpoint path')
-    parser.add_argument('--generate_repeat_time', type=int, default=1, required=False, help='generate repeat time')
-    parser.add_argument('--use_past', type=str2bool, default=True, required=False, help='whether use past')
-    parser.add_argument('--do_sample', type=str2bool, default=False, required=False, help='whether do sample')
-    parser.add_argument('--top_p', type=float, default=1, required=False, help='top p')
-    parser.add_argument('--top_k', type=int, default=0, required=False, help='top k')
-    parser.add_argument('--seq_length', type=int, default=32, required=False, help='seq length')
-    parser.add_argument('--image_path', type=str, default=None, required=False, help='image path')
-    parser.add_argument('--prompt', type=str, default=None, required=False, help='prompt content')
-    args_ = parser.parse_args()
-    print(args_)
-
-    main(args_)
diff --git a/research/visualglm/run_visualglm_6b_image_to_text_generation.yaml b/research/visualglm/run_visualglm_6b_image_to_text_generation.yaml
deleted file mode 100644
index 2a549efa..00000000
--- a/research/visualglm/run_visualglm_6b_image_to_text_generation.yaml
+++ /dev/null
@@ -1,225 +0,0 @@
-seed: 42
-run_mode: 'predict'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-# If true, auto transform load_checkpoint to load in distributed model
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  #0--Graph Mode; 1--Pynative Mode
-  mode: 1
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  save_graphs: True
-  save_graphs_path: "./graph"
-  device_id: 0
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 10
-  batch_size: &batch_size 1
-  sink_size: 2
-  image_size: 224
-
-# parallel
-use_parallel: False
-parallel:
-  parallel_mode: 0 # 0-dataset, 1-semi, 2-auto, 3-hybrid
-  gradients_mean: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: False
-  full_batch: False
-parallel_config:
-  data_parallel: 1
-  model_parallel: 1
-  pipeline_stage: 1
-  micro_batch_num: 1
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1
-
-# recompute
-recompute_config:
-  recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-
-# Trainer
-trainer:
-  type: ImageToTextGenerationTrainer
-  model_name: "visualglm_6b"
-
-pipeline:
-  type: VisualGLMImageToTextGenerationPipeline
-  model_name: "visualglm_6b"
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: Flickr8kDataLoader
-    dataset_dir: "./checkpoint_download/Flickr8k"
-    stage: "train"
-    column_names: [ "image", "text" ]
-    hypothesis_template: "{}"
-  transforms:
-    - type: ToPIL
-    - type: Resize
-      size: 224
-      interpolation: "linear"
-    - type: CenterCrop
-      size: 224
-    - type: ToTensor
-    - type: Normalize
-      mean: [ 0.48145466, 0.4578275, 0.40821073 ]
-      std: [ 0.26862954, 0.26130258, 0.27577711 ]
-      is_hwc: False
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  batch_size: 32
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 30
-  seed: 2022
-
-eval_dataset_task:
-  type: ZeroShotImageClassificationDataset
-  dataset_config: *eval_dataset
-# model
-model:
-  model_config:
-    type: VisualGLMConfig
-    batch_size: *batch_size
-    freeze_vision: True
-    freeze_text: True
-    max_txt_len: 32
-    # visualglm qformer weight
-    checkpoint_name_or_path: "/path/to/visualglm_qformer.ckpt"
-    dtype: "float32"
-    compute_dtype: "float16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float32"
-
-    qformer_config:
-      type: QFormerConfig
-      num_hidden_layers: 12
-      num_heads: 12
-      layer_norm_eps: 1.e-12
-      encoder_width: 1408
-      query_length: 32
-      dtype: "float32"
-      compute_dtype: "float16"
-      layernorm_dtype: "float32"
-      softmax_dtype: "float32"
-
-    vision_config:
-      type: ViTConfig
-      image_size: 224
-      patch_size: 14
-      num_channels: 3
-      initializer_range: 0.001
-      hidden_size: 1408
-      num_hidden_layers: 39
-      num_attention_heads: 16
-      intermediate_size: 6144
-      qkv_bias: True
-      hidden_act: gelu
-      post_layernorm_residual: false
-      layer_norm_eps: 1.0e-6
-      attention_probs_dropout_prob: 0.0
-      hidden_dropout_prob: 0.0
-      drop_path_rate: 0.0
-      use_mean_pooling: false
-      encoder_stride: 16
-#      layernorm_compute_type: float32
-#      softmax_compute_type: float32
-#      param_init_type: float32
-      # visualglm vit weight
-      checkpoint_name_or_path: "/path/to/visualglm_vit.ckpt"
-
-    text_config:
-      type: GLMConfig
-      vocab_size: 130528
-      hidden_size: 4096
-      num_layers: 28
-      num_heads: 32
-      inner_hidden_size: 16384
-      # 推理时, 输入pad到的长度, model里的最大句长
-      seq_length: 512
-      embedding_dropout_prob: 0.0
-      attention_dropout_rate: 0.0
-      hidden_dropout_rate: 0.0
-      # default "None" means hidden-size/num-attention-heads.
-      hidden_size_per_attention_head:
-      layernorm_order: "post"
-      layernorm_epsilon: 1.0e-5
-      use_final_layernorm: True
-      use_past: True
-      activation_func: 'GELU'
-      position_encoding_2d: True
-      param_init_type: "float32"
-      layernorm_compute_type: "float32"
-      softmax_compute_type: "float32"
-      compute_dtype: "float16"
-      bos_token_id: 130004
-      eos_token_id: 130005
-      mask_token_id: 130000
-      gmask_token_id: 130001
-      pad_token_id: 3
-      # The maximum length of the generated words.
-      max_decode_length: 2048
-      is_enhanced_encoder: True
-      is_sample_acceleration: False
-      checkpoint_name_or_path: "/path/to/glm_6b.ckpt"
-      top_k: 1
-      top_p: 1
-      repetition_penalty: 1
-      do_sample: True
-  arch:
-    type: VisualGLMImageToTextGeneration
-
-# processor
-processor:
-  type: VisualGLMProcessor
-  image_processor:
-    type: VisualGLMImageProcessor
-    image_size: 224  # input image size
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 32
-    vocab_file: "/path/to/ice_text.model"
-
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "mindformers"
-    save_checkpoint_steps: 10000
-    integrated_save: True
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
diff --git a/research/visualglm/run_visualglm_finetune.py b/research/visualglm/run_visualglm_finetune.py
deleted file mode 100644
index da69c6b2..00000000
--- a/research/visualglm/run_visualglm_finetune.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""visualglm finetune runner. """
-import argparse
-import json
-
-from mindformers import MindFormerConfig, MindFormerRegister, MindFormerModuleType, build_context
-from mindformers.tools.utils import str2bool
-from mindformers import Trainer
-from mindformers.tools.logger import logger
-
-from visualglm import VisualglmWithLora
-from visualglm_config import VisualGLMConfig
-from visualglm_dataloader import VisualGLMDataLoader
-from visualglm_dataset import VisualGLMDataset
-from visualglm_lr_schedule import AnnealingLR
-
-
-def main(args):
-    mode = args.graph_mode
-
-    config_path = args.config_path
-    mindformer_config = MindFormerConfig(config_path)
-
-    if mode is not None:
-        mindformer_config.context.mode = mode
-
-    if args.device_id != -1:
-        mindformer_config.context.device_id = args.device_id
-
-    if args.device_target:
-        mindformer_config.context.device_target = args.device_target
-
-    # init_context(mindformer_config, device_id=args.device_id, device_target=args.device_target, mode=mode)
-
-    build_context(mindformer_config)
-
-    logger.info(f"--------------- mindformer_config: {mindformer_config}")
-
-    model_config = VisualGLMConfig.from_pretrained(args.config_path)
-    model_config.max_txt_len = args.seq_length
-
-    if args.checkpoint is not None:
-        logger.info(f"checkpoint: {args.checkpoint}")
-        model_config.checkpoint_name_or_path = args.checkpoint
-
-    init_batch_size(args, mindformer_config, model_config)
-
-    model_config.text_config.seq_length = args.seq_length + model_config.qformer_config.query_length
-    model_config.text_config.do_sample = args.do_sample
-    model_config.text_config.top_p = args.top_p
-    model_config.text_config.top_k = args.top_k
-    model_config.text_config.use_past = args.use_past
-
-    MindFormerRegister.register_cls(
-        AnnealingLR, module_type=MindFormerModuleType.LR, alias="AnnealingLR")
-
-    MindFormerRegister.register_cls(
-        VisualGLMDataLoader, module_type=MindFormerModuleType.DATASET_LOADER, alias="VisualGLMDataLoader")
-
-    MindFormerRegister.register_cls(
-        VisualGLMDataset, module_type=MindFormerModuleType.DATASET, alias="VisualGLMDataset")
-
-    dataset_dir = mindformer_config.train_dataset.data_loader.dataset_dir
-    logger.info(f"------------------------- dataset_dir: {dataset_dir}")
-    with open(dataset_dir) as dataset_file:
-        datasets = json.load(dataset_file)
-    data_size = len(datasets)
-    logger.info(f"------------------------ data_size: {data_size}")
-
-    num_iters = mindformer_config.lr_schedule.num_iters
-    batch_size = model_config.batch_size
-    data_parallel = 1
-    if mindformer_config.use_parallel:
-        data_parallel = mindformer_config.parallel_config.data_parallel
-
-    scale = num_iters * batch_size * data_parallel // data_size + 1
-
-    logger.info(f"dataset scale: {scale} = {num_iters} * {batch_size} * {data_parallel} // {data_size} + 1")
-    mindformer_config.train_dataset.data_loader.scale = scale
-    mindformer_config.train_dataset_task.dataset_config.data_loader.scale = scale
-
-    train_dataset = VisualGLMDataset(mindformer_config.train_dataset_task.dataset_config)
-
-    model = VisualglmWithLora(model_config)
-    task = Trainer(args=mindformer_config,
-                   model=model,
-                   model_name='visualglm_6b',
-                   task='text_generation',
-                   train_dataset=train_dataset,
-                   pet_method='')
-
-    task.train(train_checkpoint=mindformer_config.load_checkpoint,
-               auto_trans_ckpt=mindformer_config.auto_trans_ckpt, resume_training=False)
-
-
-def init_batch_size(args, mindformer_config, model_config):
-    if args.batch_size > 1:
-        model_config.batch_size = args.batch_size
-    else:
-        model_config.batch_size = 1
-    mindformer_config.runner_config.batch_size = model_config.batch_size
-    mindformer_config.model.model_config.batch_size = model_config.batch_size
-    model_config.text_config.batch_size = model_config.batch_size
-    mindformer_config.train_dataset.batch_size = model_config.batch_size
-    mindformer_config.train_dataset_task.dataset_config.batch_size = model_config.batch_size
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--graph_mode', default=0, type=int, required=False, help='graph mode')
-    parser.add_argument('--model_type', default="visualglm_6b", type=str, required=False, help='model type')
-    parser.add_argument('--config_path', default="run_visualglm_lora.yaml", type=str, required=False,
-                        help='config path')
-    parser.add_argument('--device_id', type=int, default=0, required=False, help='device id')
-    parser.add_argument('--device_target', type=str, default='Ascend', required=False, help='device target')
-    parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch_size')
-    parser.add_argument('--checkpoint', type=str, default=None, required=False, help='checkpoint path')
-    parser.add_argument('--use_past', type=str2bool, default=None, required=False, help='whether use past')
-    parser.add_argument('--do_sample', type=str2bool, default=False, required=False, help='whether do sample')
-    parser.add_argument('--top_p', type=float, default=1, required=False, help='top p')
-    parser.add_argument('--top_k', type=int, default=0, required=False, help='top k')
-    parser.add_argument('--seq_length', type=int, default=32, required=False, help='seq length')
-    parser.add_argument('--image_path', type=str, default=None, required=False, help='image path')
-    args_ = parser.parse_args()
-    print(args_)
-    main(args_)
diff --git a/research/visualglm/run_visualglm_lora.yaml b/research/visualglm/run_visualglm_lora.yaml
deleted file mode 100644
index 72f8a000..00000000
--- a/research/visualglm/run_visualglm_lora.yaml
+++ /dev/null
@@ -1,291 +0,0 @@
-seed: 0
-run_mode: 'finetune'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 # 0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  device_id: 0
-  max_device_memory: "59GB"
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: &batch_size 4
-  sink_mode: False
-  sink_size: 2
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 65536
-    scale_factor: 2
-    scale_window: 400
-  use_clip_grad: True
-  max_grad_norm: 0.1
-
-# parallel
-use_parallel: False
-parallel:
-  parallel_mode: 0 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: False
-  loss_repeated_mean: True
-  full_batch: False
-parallel_config:
-  data_parallel: 1
-  model_parallel: 1
-  pipeline_stage: 1
-  optimizer_shard: True
-  micro_batch_num: 1
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: False
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'visualglm_6b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 2048
-    vocab_file: "/path_to_vocab/ice_text.model"
-  data_loader:
-    type: VisualGLMDataLoader
-    dataset_dir: "/path_to_dataset/dataset.json"
-    shuffle: False
-    column_names: ["img", "prompt", "label"]
-    file_format: json
-    random_mapping: True # if true enlarge original dataset "scale" times
-    scale: 4
-    tokenizer:
-      type: ChatGLMTokenizer
-      max_length: 2048
-      vocab_file: "/path_to_vocab/ice_text.model"
-  seed: 0
-  do_eval: False
-  input_columns: ["image", "input_ids", "labels", "position_id", "attention_mask"]
-  max_source_length: 64
-  max_target_length: 256
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: VisualGLMDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "/path_to_dataset/dataset.json"
-    shuffle: False
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 1024
-    vocab_file: "/path_to_vocab/ice_text.model"
-  input_columns: ["img", "prompt", "label"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: VisualGLMDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: VisualGLMConfig
-    batch_size: *batch_size
-    freeze_vision: True
-    freeze_text: True
-    max_txt_len: 32
-    checkpoint_name_or_path: "/path_to_qformer/visualglm_qformer.ckpt"  # visualglm qformer weight
-    dtype: "float32"
-    compute_dtype: "float16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float32"
-
-    qformer_config:
-      type: QFormerConfig
-      num_hidden_layers: 12 #12
-      num_heads: 12
-      seq_length: 1
-      layer_norm_eps: 1.e-12
-      encoder_width: 1408
-      query_length: 32
-      dtype: "float32"
-      compute_dtype: "float16"
-      layernorm_dtype: "float32"
-      softmax_dtype: "float32"
-      hidden_dropout_prob: 0.0
-      attention_probs_dropout_prob: 0.0
-
-    vision_config:
-      type: ViTConfig
-      image_size: 224
-      patch_size: 14
-      num_channels: 3
-      initializer_range: 0.001
-      hidden_size: 1408
-      num_hidden_layers: 39 # 39
-      num_attention_heads: 16
-      intermediate_size: 6144
-      qkv_bias: True
-      hidden_act: gelu
-      post_layernorm_residual: false
-      layer_norm_eps: 1.0e-6
-      attention_probs_dropout_prob: 0.0
-      hidden_dropout_prob: 0.0
-      drop_path_rate: 0.0
-      use_mean_pooling: false
-      encoder_stride: 16
-      checkpoint_name_or_path: "/path_to_vit/visualglm_vit.ckpt"  # visualglm vit weight
-
-    text_config:
-      type: GLMConfig
-      vocab_size: 130528
-      hidden_size: 4096
-      num_layers: 28 # 28
-      num_heads: 32
-      inner_hidden_size: 16384
-      seq_length: 512  # 推理时, 输入pad到的长度, model里的最大句长
-      embedding_dropout_prob: 0.0
-      attention_dropout_rate: 0.0
-      hidden_dropout_rate: 0.0
-      hidden_size_per_attention_head: # default "None" means hidden-size/num-attention-heads.
-      layernorm_order: "post"
-      layernorm_epsilon: 1.0e-5
-      use_final_layernorm: True
-      use_past: False
-      activation_func: 'GELU'
-      position_encoding_2d: True
-      param_init_type: "float32"
-      layernorm_compute_type: "float32"
-      softmax_compute_type: "float32"
-      compute_dtype: "float16"
-      bos_token_id: 130004
-      eos_token_id: 130005
-      mask_token_id: 130000
-      gmask_token_id: 130001
-      pad_token_id: 3
-      max_decode_length: 2048  # The maximum length of the generated words.
-      is_enhanced_encoder: True
-      is_sample_acceleration: False
-      checkpoint_name_or_path: "/path_to_glm/glm_6b.ckpt"
-      top_k: 1
-      top_p: 1
-      repetition_penalty: 1
-      do_sample: True
-      pet_config:
-        pet_type: lora
-        lora_rank: 10
-        lora_alpha: 128
-        lora_dropout: 0.0
-        exclude_layers: [ 'mixins', 'mlp' ]
-        pre_seq_len: 4
-        target_modules: ''
-        layer_range: [ 0, 14 ] # int or list to specify layers to add lora
-  arch:
-    type: CausalLanguageModelDatasetVisualGLM # Blip2ImageToTextGeneration
-# processor
-processor:
-  type: VisualGLMProcessor
-  image_processor:
-    type: VisualGLMImageProcessor
-    image_size: 224  # input image size
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 2048
-    vocab_file: "/path_to_vocab/ice_text.model"
-
-# lr schedule
-lr_schedule:
-  type: AnnealingLR
-  learning_rate: 0.00001
-  warmup_steps: 100
-  num_iters: 1500
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: AdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 0.00000001 # 1e-8
-  weight_decay: 0.01
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "visualglm"
-    save_checkpoint_steps: 1500
-    integrated_save: True
-    save_network_params: False
-    save_trainable_params: True
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-
-
diff --git a/research/visualglm/run_visualglm_pipeline.py b/research/visualglm/run_visualglm_pipeline.py
deleted file mode 100644
index 0749470f..00000000
--- a/research/visualglm/run_visualglm_pipeline.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""visualglm pipeline runner."""
-import argparse
-
-import mindspore as ms
-from mindspore.dataset import vision
-from mindspore.dataset.vision.utils import Inter
-
-from mindformers.pipeline import pipeline
-from mindformers.tools.utils import str2bool
-from mindformers.tools.logger import logger
-
-from visualglm import VisualGLMImageToTextGeneration
-from visualglm_config import VisualGLMConfig
-from visualglm_processor import VisualGLMProcessor
-
-
-def init_context(device_id):
-    """ init context """
-    ms.set_context(mode=0, device_target="Ascend", device_id=device_id, max_device_memory="59GB")  # Ascend, CPU
-
-
-def main(args):
-    init_context(device_id=args.device_id)
-    model_config = VisualGLMConfig.from_pretrained(args.config_path)
-    model_config.max_txt_len = args.seq_length
-
-    if args.checkpoint is not None:
-        logger.info(f"checkpoint: {args.checkpoint}")
-        model_config.checkpoint_name_or_path = args.checkpoint
-
-    if args.batch_size > 1:
-        model_config.batch_size = args.batch_size
-    else:
-        model_config.batch_size = 1
-
-    model_config.text_config.batch_size = model_config.batch_size
-    model_config.text_config.seq_length = args.seq_length + model_config.qformer_config.query_length
-    model_config.text_config.do_sample = args.do_sample
-    model_config.text_config.top_p = args.top_p
-    model_config.text_config.top_k = args.top_k
-    model_config.text_config.use_past = args.use_past
-
-    model = VisualGLMImageToTextGeneration(model_config)
-    processor = VisualGLMProcessor.from_pretrained(args.config_path)
-    processor.image_processor.resize.resize = vision.transforms.Resize((224, 224), Inter.BICUBIC)
-    tokenizer = processor.tokenizer
-
-    logger.info(f"batch_size is {model_config.batch_size}")
-
-    pipeline_task = pipeline(task='visualglm_image_to_text_generation', model=model,
-                             image_processor=processor.image_processor,
-                             tokenizer=tokenizer, batch_size=model_config.batch_size)
-
-    predict_result = pipeline_task({
-        "image": args.image_path,
-        "prompt": args.prompt})
-    logger.info(predict_result)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_type', default="visualglm_6b", type=str, required=False, help='model type')
-    parser.add_argument('--config_path', default="run_visualglm_6b_image_to_text_generation.yaml",
-                        type=str, required=False, help='config path')
-    parser.add_argument('--device_id', type=int, default=0, required=False, help='device id')
-    parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch_size')
-    parser.add_argument('--checkpoint', type=str, default=None, required=False, help='checkpoint path')
-    parser.add_argument('--generate_repeat_time', type=int, default=1, required=False, help='generate repeat time')
-    parser.add_argument('--use_past', type=str2bool, default=True, required=False, help='whether use past')
-    parser.add_argument('--do_sample', type=str2bool, default=False, required=False, help='whether do sample')
-    parser.add_argument('--top_p', type=float, default=1, required=False, help='top p')
-    parser.add_argument('--top_k', type=int, default=0, required=False, help='top k')
-    parser.add_argument('--seq_length', type=int, default=32, required=False, help='seq length')
-    parser.add_argument('--image_path', type=str, default=None, required=False, help='image path')
-    parser.add_argument('--prompt', type=str, default=None, required=False, help='')
-    args_ = parser.parse_args()
-    print(args_)
-    main(args_)
diff --git a/research/visualglm/run_visualglm_with_lora.py b/research/visualglm/run_visualglm_with_lora.py
deleted file mode 100644
index 947cc89d..00000000
--- a/research/visualglm/run_visualglm_with_lora.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""default visualglm runner. """
-import argparse
-import re
-
-import mindspore as ms
-from mindspore import load_checkpoint
-from mindspore.dataset import vision
-from mindspore.dataset.vision.utils import Inter
-
-from mindformers.tools.image_tools import load_image
-from mindformers.tools.utils import str2bool
-from mindformers.tools.logger import logger
-
-from visualglm import VisualglmWithLora
-from visualglm_config import VisualGLMConfig
-from visualglm_processor import VisualGLMProcessor
-
-
-def init_context(device_id):
-    """init context"""
-    ms.set_context(mode=0, device_target="Ascend", device_id=device_id, max_device_memory="59GB")  # Ascend, CPU
-
-
-def build_text_input(prompts, templates):
-    """build text input from prompts"""
-    text_input = []
-    for i in range(len(prompts)):
-        text_input.append(templates[i].format(prompts[i]))
-    return text_input
-
-
-def process_response(response_list):
-    """ get standard response output """
-    handled_response = []
-    for response in response_list:
-        response = response.strip()
-        response = response.replace("[[训练时间]]", "2023年")
-        punkts = [
-            [",", "，"],
-            ["!", "！"],
-            [":", "："],
-            [";", "；"],
-            [r"\?", "？"],
-        ]
-        for item in punkts:
-            response = re.sub(fr"([\u4e00-\u9fff]){item[0]}", r"\1%s" % item[1], response)
-            response = re.sub(fr"{item[0]}([\u4e00-\u9fff])", r"%s\1" % item[1], response)
-        response = response.split('答：')[-1].strip()
-        handled_response.append(response)
-    return handled_response
-
-
-DEFAULT_IMAGE_TEXT_PAIR = [
-    ("./finetune/sea.jpg", "这张图片的背景里有什么内容？")
-]
-
-
-def generate_glm_prompt(unhandled_prompts, history=None, english=False):
-    """ generate glm prompt from raw prompt"""
-    if history is None:
-        history = []
-    post_prompts, image_positions = [], []
-    for query in unhandled_prompts:
-        prompt = "</img>"
-        if english:
-            for _, (old_query, response) in enumerate(history):
-                prompt += f"Q:{old_query}\nA:{response}\n"
-            prompt += f"Q:{query}\nA:"
-        else:
-            for _, (old_query, response) in enumerate(history):
-                prompt += f"问：{old_query}\n答：{response}\n"
-            prompt += f"问：{query}\n答："
-        post_prompts.append(prompt)
-    pre_prompts = ["<img>"] * len(post_prompts)
-    image_positions = [len("<img>")] * len(post_prompts)
-    return pre_prompts, post_prompts, image_positions
-
-
-def handle_prompt(args):
-    """handle prompt"""
-    if args.image_path is None:
-        image_filepath = [pair[0] for pair in DEFAULT_IMAGE_TEXT_PAIR]
-    else:
-        image_filepath = args.image_path.split(',')
-
-    if args.prompt is None:
-        if args.image_path is not None:
-            raw_prompts = [""] * len(image_filepath)
-        else:
-            raw_prompts = [pair[1] for pair in DEFAULT_IMAGE_TEXT_PAIR]
-    else:
-        raw_prompts = args.prompt.split(',')
-
-    if len(raw_prompts) != len(image_filepath):
-        raise ValueError("prompts length do not equal to image_path length, please check the args.")
-
-    # handle prompt using chatglm type
-    pre_prompts, post_prompts, image_positions = generate_glm_prompt(raw_prompts)
-
-    return image_filepath, pre_prompts, post_prompts, image_positions
-
-
-def main(args):
-    init_context(device_id=args.device_id)
-    model_config = VisualGLMConfig.from_pretrained(args.config_path)
-    model_config.max_txt_len = args.seq_length
-
-    if args.checkpoint is not None:
-        logger.info(f"checkpoint: {args.checkpoint}")
-        model_config.checkpoint_name_or_path = args.checkpoint
-
-    image_filepath, pre_prompts, post_prompts, image_positions = handle_prompt(args)
-
-    if args.batch_size > 1:
-        model_config.batch_size = args.batch_size
-
-        diff = model_config.batch_size - len(image_filepath)
-        if diff > 0:
-            extend_filepath = [image_filepath[-1]] * diff
-            extend_pre_prompt = [pre_prompts[-1]] * diff
-            extend_post_prompt = [post_prompts[-1]] * diff
-            extend_positions = [image_positions[-1]] * diff
-            image_filepath.extend(extend_filepath)
-            pre_prompts.extend(extend_pre_prompt)
-            post_prompts.extend(extend_post_prompt)
-            image_positions.extend(extend_positions)
-        else:
-            image_filepath = image_filepath[:model_config.batch_size]
-            pre_prompts = pre_prompts[:model_config.batch_size]
-            post_prompts = post_prompts[:model_config.batch_size]
-    else:
-        model_config.batch_size = 1
-
-    model_config.text_config.batch_size = model_config.batch_size
-    model_config.text_config.seq_length = args.seq_length + model_config.qformer_config.query_length
-    model_config.text_config.do_sample = args.do_sample
-    model_config.text_config.top_p = args.top_p
-    model_config.text_config.top_k = args.top_k
-    model_config.text_config.use_past = args.use_past
-
-    # model = VisualGLMImageToTextGeneration(model_config)
-    model = VisualglmWithLora(model_config)
-    logger.info(f"------------------- lora checkpint: {args.lora_checkpoint}")
-    load_checkpoint(args.lora_checkpoint, model)
-    model.set_train(False)
-
-    processor = VisualGLMProcessor.from_pretrained(args.config_path)
-
-    processor.image_processor.resize.resize = vision.transforms.Resize((224, 224), Inter.BICUBIC)
-
-    tokenizer = processor.tokenizer
-
-    for _ in range(args.generate_repeat_time):
-        if model_config.batch_size > 1:
-            input_images = processor.image_processor([load_image(filepath) for filepath in image_filepath])
-            pre_input_ids = tokenizer(pre_prompts, add_special_tokens=False, return_tensors="ms")["input_ids"]
-            post_input_ids = tokenizer(post_prompts,
-                                       max_length=args.seq_length - len(pre_input_ids[0]),
-                                       padding="max_length",
-                                       return_tensors="ms")["input_ids"]
-            output = model.generate_text_for_image(input_images, pre_input_ids, post_input_ids)
-            response = tokenizer.decode(output, skip_special_tokens=True)
-            response = process_response(response)
-            logger.info(f"Response:{response}")
-        else:
-            batch_size = len(image_filepath)
-            for index in range(batch_size):
-                pil_image = load_image(image_filepath[index])
-                input_image = processor.image_processor(pil_image)
-                pre_input_ids = tokenizer(pre_prompts[index], add_special_tokens=False, return_tensors="ms")[
-                    "input_ids"]
-                post_input_ids = tokenizer(post_prompts[index],
-                                           max_length=args.seq_length - len(pre_input_ids),
-                                           padding="max_length",
-                                           return_tensors="ms")["input_ids"]
-
-                output = model.generate_text_for_image(input_image, pre_input_ids, post_input_ids)
-
-                response = tokenizer.decode(output, skip_special_tokens=True)
-                response = process_response(response)
-                logger.info(f"Response:{response}")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_type', default="visualglm_6b", type=str, required=False, help='model type')
-    parser.add_argument('--config_path', default="run_visualglm_lora.yaml", type=str, required=False,
-                        help='config path')
-    parser.add_argument('--lora_checkpoint', type=str, default=None, required=True, help='checkpoint path')
-    parser.add_argument('--device_id', type=int, default=0, required=False, help='device id')
-    parser.add_argument('--batch_size', type=int, default=1, required=False, help='batch size')
-    parser.add_argument('--checkpoint', type=str, default=None, required=False, help='checkpoint path')
-    parser.add_argument('--generate_repeat_time', type=int, default=1, required=False, help='generate repeat time')
-    parser.add_argument('--use_past', type=str2bool, default=True, required=False, help='whether use past')
-    parser.add_argument('--do_sample', type=str2bool, default=False, required=False, help='whether do sample')
-    parser.add_argument('--top_p', type=float, default=1, required=False, help='top p')
-    parser.add_argument('--top_k', type=int, default=0, required=False, help='top k')
-    parser.add_argument('--seq_length', type=int, default=128, required=False, help='seq length')
-    parser.add_argument('--image_path', type=str, default=None, required=False, help='image path')
-    parser.add_argument('--prompt', type=str, default=None, required=False, help='prompt content')
-    args_ = parser.parse_args()
-    print(args_)
-    main(args_)
diff --git a/research/visualglm/visualglm.md b/research/visualglm/visualglm.md
deleted file mode 100644
index 5e22cd86..00000000
--- a/research/visualglm/visualglm.md
+++ /dev/null
@@ -1,373 +0,0 @@
-# VisualGLM
-
-VisualGLM是由清华大学的GLM团队推出的一个新的多模态对话语言模型，支持图像、中文和英文的输入和输出。VisualGLM大幅度地提升了多模态对话的SOTA水平，创造了令人惊叹的效果，能够根据图像和文本的内容生成符合人类偏好的回答，成为了多模态领域的新时代引领者。 VisualGLM完全开源可商用，基于 Transformer 结构，语言模型部分基于 ChatGLM-6B ，具有 62 亿参数；图像部分通过训练 BLIP2-Qformer 构建起视觉模型与语言模型的桥梁，整体模型共 78 亿参数。
-
-## VisualGLM-6B
-
-VisualGLM-6B 依靠来自于 CogView 数据集的30M高质量中文图文对，与300M经过筛选的英文图文对进行预训练，中英文权重相同。该训练方式较好地将视觉信息对齐到ChatGLM的语义空间；之后的微调阶段，模型在长视觉问答数据上训练，以生成符合人类偏好的答案。
-
-## 前期准备
-
-### 安装mindformers
-
-参考[README](../../README.md) "mindformers安装" 安装mindformers。
-
-### 环境要求
-
-- 硬件: Atlas 800T A2
-- MindSpore: 2.2.10
-- MindSpore Lite: 2.2.10
-- MindFormers: dev
-- Mindpet: 1.0.2
-
-**注：** VisualGLM-6B推理可以在单卡上完成部署，全量微调至少需要8卡。
-
-### 生成RANK_TABLE_FILE(多卡运行必须环节)
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python mindformers/tools/hccl_tools.py --device_num "[0,8)"
-```
-
-**注：** 若使用ModelArts的notebook环境，可从 `/user/config/jobstart_hccl.json` 路径下直接获取rank table，无需手动生成
-
-### VisualGLM-6B 预训练权重下载和转换
-
-#### 1. 从huggingface下载tokenizer权重后转换
-
-从HuggingFace网站下载visualglm 6b词库的文件 ice_text.model。
-下载地址：https://huggingface.co/THUDM/visualglm-6b/tree/main
-
-#### 2. 从SAT仓库下载visualglm权重
-
-推荐使用rclone工具下载模型
-
-**步骤**
-
-1) 下载rclone工具
-下载地址：<https://rclone.org/downloads/>
-根据服务器的类型和处理器，选择对应的文件。
-下载完以后解压，把其中的脚本拷贝出来，放到执行目录下：
-cp rclone*/rclone /usr/local/bin/
-
-2) 创建rclone配置文件
-
-在home目录创建rclone.conf文件
-
-- Windows系统对于的目录：C:\Users\用户名\.config\rclone\rclone.conf;
-- linux系统对应的目录：~/.config/rclone/rclone.conf
-
-配置内容，这里的配置不需要修改：
-
-```text
-[r2]
-type = s3
-provider = Cloudflare
-access_key_id = eb4d69e273848089c7f9b9599cdcd983
-secret_access_key = 367e9b21fef313f187026320016962b47b74ca4ada7d64d551c43c51e195d7a5
-endpoint = https://c8a00746a80e06c4632028e37de24d6e.r2.cloudflarestorage.com
-acl = private
-```
-
-3) 使用rclone脚本来下载权重文件
-
-**参数说明**
-
-- THREAD_COUNT：下载的线程数量，可以根据实际带宽来调整。
-
-```shell
-cd 模型下载路径/
-rclone copy  -P --multi-thread-streams THREAD_COUNT  --no-check-certificate -vv --size-only  r2:/sat/visualglm-6b.zip ./
-```
-
-4) 执行权重转换脚本
-
-```shell
-cd research/visualglm
-python convert_weight.py --torch_path TORCH_CKPT_DIR --vit_mindspore_path VIT_CKPT_PATH --qformer_mindspore_path QFORMER_CKPT_PATH --glm_mindspore_path GLM_CKPT_PATH
-```
-
-**参数说明**
-
-1. TORCH_CKPT_DIR: huggingface VisualGLM-6B权重保存目录路径，路径要指定到文件；
-2. VIT_CKPT_PATH: vit模型mindspore权重文件保存路径，路径要指定到文件；
-3. QFORMER_CKPT_PATH: qformer模型mindspore权重文件保存路径，路径要指定到文件；
-4. GLM_CKPT_PATH: glm模型mindspore权重文件保存路径和名称，路径要指定到文件。
-
-**注意**:
-
-- 请安装torch=2.0.1和transformers=4.33.2版本，cuda版本11.6及以上
-- 该脚本会在glm模型的路径下生成glm_6b_for_lite.ckpt文件，该权重是用于lite推理的。
-
-## MindSpore推理
-
-> 接口说明请参考[API接口](https://gitee.com/mindspore/transformer/wikis/API/)
->
-
-**注意**
-
-- 图片路径：推理用的参考图片在代码仓库的examples路径下
-- 提示词：每张图片都有一个对应的参考提示词，可以在example_inputs.jsonl文件找到
-
-由于visualglm模型的权重需要用户自行下载，因此在启动前，请先行在配置文件中将权重的路径配置完成。
-打开配置文件 research/visualglm/run_visualglm_6b_image_to_text_generation.yaml，修改如下：
-
-- 替换/path/to/visualglm_qformer.ckpt为上面转换的qformer权重的实际路径
-- 替换/path/to/visualglm_vit.ckpt为上面转换的vit权重的实际路径
-- 替换/path/to/glm_6b.ckpt为上面转换的glm权重的实际路径
-- 替换/path/to/ice_text.model为上面下载的ice_text.model的实际路径
-
-```yaml
-model:
-  model_config:
-    type: VisualGLMConfig
-    #...
-    checkpoint_name_or_path: "/path/to/visualglm_qformer.ckpt"  # visualglm qformer weight
-
-    vision_config:
-      #...
-      checkpoint_name_or_path: "/path/to/visualglm_vit.ckpt"  # visualglm vit weight
-
-    text_config:
-      type: GLMConfig
-      #...
-      checkpoint_name_or_path: "/path/to/glm_6b.ckpt" # visualglm glm weight
-
-processor:
-  type: VisualGLMProcessor
-  image_processor:
-    type: VisualGLMImageProcessor
-    image_size: 224  # input image size
-  tokenizer:
-    #...
-    checkpoint_name_or_path: "/path/to/ice_text.model"
-
-```
-
-如果使用增量推理，需要在配置文件中use_past值设置为True。
-
-- generate接口推理：
-
-visualglm的generate接口使用脚本已集成在run_visualglm.py脚本中，运行此脚本命令：
-
-```shell
-cd research/visualglm
-python run_visualglm.py --config CONFIG_PATH --image_path=IMAGE_PATH --prompt=PROMPT --device_id DEVICE_ID
-#运行结果：
-#['<img> </img>问:描述这张图片。\n答: 泰坦尼克号 电影截图']
-# 运行结果
-
-```
-
-**参数说明**
-
-1. CONFIG_PATH：yaml配置的路径，默认是run_visualglm_6b_image_to_text_generation.yaml
-2. IMAGE_PATH：推理图片的路径，比如examples/titanic.jpg
-3. PROMPT：提示词，比如"描述这张图片。"，注意要加引号
-4. DEVICE_ID：NPU卡的编号，默认是0
-
-- pipeline接口推理
-
-visualglm的pipeline接口推理已集成在run_visualglm_pipeline.py脚本中，运行此脚本命令示例：
-
-```shell
-cd research/visualglm
-python run_visualglm_pipeline.py --config CONFIG_PATH --device_id DEVICE_ID --batch_size BATCH_SIZE --use_past True --seq_length SEQ_LENGTH \
- --image_path IMAGE_PATH --prompt PROMPT
-# 运行结果
-#['<img> </img>问:描述这张图片。\n答: 泰坦尼克号 电影截图']
-
-```
-
-**参数说明**
-
-1. CONFIG_PATH：yaml配置的路径，默认是run_visualglm_6b_image_to_text_generation.yaml
-2. IMAGE_PATH：推理图片的路径，比如examples/titanic.jpg
-3. PROMPT：提示词，比如"描述这张图片。"，注意要加引号
-4. BATCH_SIZE: 图片批次的大小，默认是1
-5. SEQ_LENGTH: token的长度，默认是32
-4. DEVICE_ID：NPU卡的编号，默认是0
-
-## MindSpore 微调
-
-注意：目前lora微调只支持数据并行，不支持半自动并行和自动并行
-
-- **step1. 下载微调数据集**
-
-数据集路径：
-https://github.com/THUDM/VisualGLM-6B/blob/main/fewshot-data.zip
-
-下载完以后传到服务器，解压到research/visualglm下面
-记录下fewhot-data/dataset.json文件的路径
-
-- **step2. 修改微调配置参数**
-
-修改/research/visualglm/run_visualglm_lora.yaml文件:
-
-1. 修改所有path_to_vocab为ice_text.model词库文件的路径
-2. 修改所有path_to_dataset为上面数据集dataset.json文件的路径
-3. 修改path_to_qformer为上面转换的qformer权重文件visualglm_qformer.ckpt的路径
-4. 修改path_to_vit为上面转换的vit权重文件visualglm_vit.ckpt的路径
-5. 修改path_to_glm为上面转换的glm权重文件glm_6b.ckpt的路径
-
-```yaml
-train_dataset: &train_dataset
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 2048
-    vocab_file: "/path_to_vocab/ice_text.model"
-  data_loader:
-    type: VisualGLMDataLoader
-    dataset_dir: "/path_to_dataset/dataset.json"
-    shuffle: False
-    file_format: json
-    random_mapping: True # if true enlarge original dataset "scale" times
-    scale: 1
-
-model:
-  model_config:
-    type: VisualGLMConfig
-    #...
-    checkpoint_name_or_path: "/path_to_qformer/visualglm_qformer.ckpt"
-
-    vision_config:
-      type: ViTConfig
-      #...
-      checkpoint_name_or_path: "/path_to_vit/visualglm_vit.ckpt"
-
-    text_config:
-      type: GLMConfig
-      #...
-      checkpoint_name_or_path: "/path_to_glm/glm_6b.ckpt"
-
-processor:
-  type: VisualGLMProcessor
-  image_processor:
-    type: VisualGLMImageProcessor
-    image_size: 224  # input image size
-  tokenizer:
-    type: ChatGLMTokenizer
-    max_length: 2048
-    vocab_file: "/path_to_vocab/ice_text.model"
-
-```
-
-- **step 3. 启动微调任务，按照以下步骤启动：**
-
-调整learning rate和warmup超参，修改/research/visualglm/run_visualglm_lora.yaml文件，根据实际业务调整下面的超参：
-
-1. learning_rate： 微调的模型学习率不宜设置过大
-2. warmup_steps：预热步数，表示在训练开始时逐渐增加学习率的步数。这样做可以避免模型在初始阶段受到过大的梯度干扰，提高模型的泛化能力。
-3. num_iters：迭代次数，表示模型在一个epoch中处理数据的次数。一个epoch表示模型遍历整个数据集一次。
-4. total_steps：总步数，表示模型在整个训练过程中处理数据的次数。总步数等于epoch数乘以迭代次数。如果设置为-1，表示不限制总步数，只根据epoch数来决定训练的终止条件4。
-
-```yaml
-# lr schedule
-lr_schedule:
-  type: AnnealingLR
-  learning_rate: 0.00001
-  warmup_steps: 100
-  num_iters: 5000
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-```
-
-- **step4. 使用shell命令启动微调**
-
-调用下面的脚本启动微调：
-
-```shell
-cd research/visualglm
-python run_visualglm_finetune.py --config CONFIG_PATH --graph_mode GRAPH_MODE --batch_size BATCH_SIZE --device_id DEVICE_ID
-```
-
-**参数说明**
-
-1. CONFIG_PATH：微调配置，默认是run_visualglm_lora.yaml
-2. GRAPH_MODE：图模式编号，默认是0。0：graph模式，1：pynative模式
-3. BATCH_SIZE：批次大小，默认是1
-4. DEVICE_ID：NPU卡的编号，默认是0
-
-- **step5. 并行训练**
-
-运行mindformers/tools/hccl_tools.py生成RANK_TABLE_FILE的json文件
-这会生成一个名字为hccl_8p_01234567_XXXX.json的文件
-
-```bash
-# 运行如下命令，生成当前机器的RANK_TABLE_FILE的json文件
-python mindformers/tools/hccl_tools.py --device_num "[START_ID, END_ID)"
-```
-
-参数说明：
-
-- \[START_ID, END_ID\]:  表示卡的范围，START_ID是第一块卡的编号，END_ID是最后一块卡的编号，比如8卡为[0,8)
-
-修改run_visualglm_lora.yaml中的并行参数
-
-- use_parallel: 改为True
-- parallel_mode：目前只支持数据并行，值为0
-- data_parallel：改为上面卡的数量，比如8卡改成8
-
-```yaml
-use_parallel: True
-parallel:
-  parallel_mode: 0
-parallel_config:
-  data_parallel: 8
-  model_parallel: 1
-
-```
-
-运行run_singlenode.sh脚本来执行多卡训练
-
-1. 把HCCL_JSON_PATH替换为上面生成的hccl json文件的路径
-2. \[START_ID, END_ID\]:  表示卡的范围，START_ID是第一块卡的编号，END_ID是最后一块卡的编号，要跟上面RANK_TABLE_FILE的配置保持一致；
-3. CARD_COUNT: 表示使用NPU卡的数量，要跟上面RANK_TABLE_FILE的配置保持一致
-
-```shell
-cd research/visualglm
-bash ../run_singlenode.sh \
-"python run_visualglm_finetune.py --config CONFIG_PATH --graph_mode GRAPH_MODE --batch_size BATCH_SIZE" \
-HCCL_JSON_PATH [START_ID, END_ID] CARD_COUNT
-
-```
-
-**参数说明**
-
-1. CONFIG_PATH：微调配置，默认是run_visualglm_lora.yaml
-2. GRAPH_MODE：图模式编号，默认是0。0：graph模式，1：pynative模式
-3. BATCH_SIZE：批次大小，默认是1
-4. HCCL_JSON_PATH: 多机多卡HCCL通信的配置，使用上面生成的RANK_TABLE_FILE的路径
-5. \[START_ID, END_ID\]:  表示卡的范围，START_ID是第一块卡的编号，END_ID是最后一块卡的编号
-6. CARD_COUNT：表示使用NPU卡的数量
-
-**注意**
-
-1. 这里START_ID，END_ID和CARD_COUNT要跟上面RANK_TABLE_FILE的配置保持一致
-
-- **step6. 使用shell命令启动推理**
-
-**注意**
-
-- 图片路径：微调推理用的参考图片在代码仓库的finetune路径下
-- 提示词：每张图片都有一个对应的参考提示词，可以在finetune_inputs.jsonl文件找到
-
-调用预先开发好的脚本run_visualglm_with_lora.py，传入相关的图片和提示词，会得到相关的文本。
-
-```shell
-python run_visualglm_with_lora.py --lora_checkpoint CHECKPOINT_PATH  --config CONFIG_PATH --image_path=IMAGE_PATH --prompt=PROMPT  --device_id DEVICE_ID
-#运行结果：
-#['这张图片是雨天的。']
-```
-
-**说明**:
-
-1. CHECKPOINT_PATH：训练完以后生成的checkpiont的绝对路径，checkpoint一般会保存在下面的路径下output/checkpoint_trainable/rank_[id]/
-2. CONFIG_PATH： 表示yaml配置的路径，默认使用run_visualglm_lora.yaml
-3. IMAGE_PATH：表示图片的路径，比如finetune/ghost.jpg
-4. PROMPT：表示提示词，比如"这张图片的背景里有什么内容？"，注意外面要加引号
-5. DEVICE_ID: 表示NPU卡的编号，默认是0
diff --git a/research/visualglm/visualglm.py b/research/visualglm/visualglm.py
deleted file mode 100644
index 7c2d2e5e..00000000
--- a/research/visualglm/visualglm.py
+++ /dev/null
@@ -1,307 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""visualglm model implementation."""
-
-import os
-from collections import OrderedDict
-
-import mindspore as ms
-import mindspore.common.dtype as mstype
-from mindspore import ops
-from mindspore.nn import CrossEntropyLoss
-from mindspore.ops import operations as P
-
-from mindformers import MindFormerBook, LoraAdapter
-from mindformers.modules.layers import Linear
-from mindformers.tools.logger import logger
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindpet.graph import freeze_delta
-from visualglm_base import VisualGLMBase
-from visualglm_config import VisualGLMConfig
-from visualglm_text_generation_pipeline import register_pipeline_task
-
-__all__ = ['VisualGLMModel', 'VisualGLMImageToTextGeneration', 'VisualglmWithLora']
-
-
-def register_trainer_task():
-    """ register trainer task for visualglm """
-    cur_path = os.path.dirname(os.path.realpath(__file__))
-    MindFormerBook.get_trainer_support_task_list()['text_generation'] = OrderedDict([
-        ("visualglm_6b", os.path.join(
-            cur_path, "run_visualglm_lora.yaml"))])
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class VisualGLMModel(VisualGLMBase):
-    """
-    visualglm with llm model.
-    """
-
-    def __init__(self, config: VisualGLMConfig, **kwargs):
-        super(VisualGLMModel, self).__init__(config, **kwargs)
-        self.batch_size = None
-        self.config = config if config is not None else VisualGLMConfig()
-
-        self.visual_encoder, self.ln_vision = self.init_vision_encoder()
-        if config.freeze_vision:
-            logger.info("freeze vision encoder")
-            for param in self.visual_encoder.trainable_params():
-                param.requires_grad = False
-            self.visual_encoder.set_train(False)
-
-        self.qformer, self.query_tokens = self.init_qformer()
-
-        self.qformer.cls = None
-        self.qformer.bert.embeddings.word_embeddings = None
-        self.qformer.bert.embeddings.position_embeddings = None
-        for layer in self.qformer.bert.encoder.layer:
-            layer.output = None
-            layer.intermediate = None
-
-        if config.text_config.seq_length < config.max_txt_len + config.qformer_config.query_length:
-            raise ValueError(
-                f"seq_length should be greater than sum of max_text_len and num_query_token "
-                f"{config.max_txt_len + config.qformer_config.query_length:d}, but got "
-                f"{config.text_config.seq_length:d}")
-
-        self.llm_model = self.init_llm()
-
-        if config.freeze_text:
-            logger.info("freeze llm model")
-            for param in self.llm_model.trainable_params():
-                param.requires_grad = False
-            self.llm_model.set_train(False)
-
-        dp = config.parallel_config.data_parallel
-
-        micro_batch_interleave_num = config.micro_batch_interleave_num
-
-        self.init_batch_size(dp, micro_batch_interleave_num)
-
-        self.llm_proj = Linear(in_channels=self.config.qformer_config.hidden_size,
-                               out_channels=self.config.text_config.hidden_size,
-                               param_init_type=config.dtype,
-                               compute_dtype=config.compute_dtype)
-
-        pp = config.parallel_config.pipeline_stage
-        if pp > 1:
-            self.visual_encoder.pipeline_stage = 0
-            self.qformer.pipeline_stage = 0
-            self.llm_proj.pipeline_stage = 0
-
-        if config.checkpoint_name_or_path:
-            logger.info(
-                "load blip2 first stage pretrained model for vision model and qformer, checkpoint_name_or_path: "
-                "%s. pretrained llm model: %s", config.checkpoint_name_or_path,
-                config.text_config.checkpoint_name_or_path)
-            self.load_checkpoint(config)
-        else:
-            self.init_checkpoint(config)
-
-        self.eos_token_id = config.text_config.eos_token_id
-        self.pad_token_id = config.text_config.pad_token_id
-        self.ignore_token_id = config.text_config.ignore_token_id
-        self.max_txt_len = config.max_txt_len
-        self.prompt = config.prompt
-        self.prompt_length = config.prompt_length
-
-        self.broadcast_to = P.BroadcastTo((self.batch_size,
-                                           self.config.qformer_config.query_length,
-                                           self.config.qformer_config.hidden_size)).shard(((1, 1, 1),))
-        self.fill = P.Fill().shard(((dp, 1),))
-        self.masked_fill = P.MaskedFill().shard(((dp, 1), ()))
-        self.ones = P.Ones().shard(((dp, 1),))
-        self.concat_2d = P.Concat(axis=1).shard(((dp, 1), (dp, 1)))
-        self.concat_3d = P.Concat(axis=1).shard(((dp, 1, 1), (dp, 1, 1)))
-        self.not_equal = P.NotEqual().shard(((dp, 1), ()))
-        self.slice = P.StridedSlice().shard(((dp, 1),))
-        self.cast = P.Cast()
-        self.loss_fct = CrossEntropyLoss(ignore_index=-100)
-
-        register_pipeline_task()
-
-    def init_batch_size(self, dp, micro_batch_interleave_num):
-        """
-        init batch size
-        :param dp: data parallel config
-        :param micro_batch_interleave_num: micro batch interleave num
-        """
-        batch_size = self.config.batch_size
-        parallel_mode = ms.get_auto_parallel_context("parallel_mode")
-        if parallel_mode in ["semi_auto_parallel", "auto_parallel"]:
-            full_batch = ms.get_auto_parallel_context("full_batch")
-            if full_batch:
-                self.batch_size = batch_size * dp * micro_batch_interleave_num
-            else:
-                card_num = int(os.getenv('RANK_SIZE', '1'))
-                self.batch_size = int(card_num * batch_size / micro_batch_interleave_num)
-        else:
-            self.batch_size = batch_size
-
-    @staticmethod
-    def init_checkpoint(config):
-        """ init checkpoint """
-        if config.vision_config.checkpoint_name_or_path:
-            vision_checkpoint = config.vision_config.checkpoint_name_or_path
-        else:
-            vision_checkpoint = 'not configured'
-        if config.text_config.checkpoint_name_or_path:
-            text_checkpoint = config.text_config.checkpoint_name_or_path
-        else:
-            text_checkpoint = 'not configured'
-        if config.qformer_config.checkpoint_name_or_path:
-            qformer_checkpoint = config.qformer_config.checkpoint_name_or_path
-        else:
-            qformer_checkpoint = 'not configured'
-        logger.info("training blip2 second stage, pretrained vision model: %s, pretrained llm model: %s, "
-                    "pretrained qformer: %s", vision_checkpoint, text_checkpoint, qformer_checkpoint)
-
-    def forward_qformer_and_proj(self, image: ms.Tensor):
-        """forward the image tensor to the qformer, then project the output to adapt the dimension"""
-        image_embeds = self.visual_encoder(image)
-        image_embeds = self.ln_vision(image_embeds)  # [batch_size, vit_seq_length, vit_encoder_hidden_width]
-        image_atts = self.ones(image_embeds.shape[:-1], mstype.float32)  # [batch_size, vit_seq_length]
-
-        query_tokens = self.broadcast_to(self.query_tokens)  # [batch_size, query_size, qformer_hidden_size]
-
-        query_output = self.qformer.bert(query_embeds=query_tokens,
-                                         encoder_hidden_states=image_embeds,
-                                         encoder_attention_mask=image_atts,
-                                         use_cache=True)
-
-        # [batch_size, query_size, qformer_hidden_size] -> [batch_size, query_size, llm_hidden_size]
-        return self.llm_proj(query_output[0])
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class VisualGLMImageToTextGeneration(VisualGLMModel):
-    """
-    VisualGLMImageToTextGeneration rely on Blip2Llm, used for image to text genearation.
-
-    Args:
-        config (VisualGLMConfig): The config of Blip2ImageToTextGeneration.
-
-    Examples:
-        >>> from mindformers import Blip2ImageToTextGeneration
-        >>> model = Blip2ImageToTextGeneration.from_pretrained('itt_blip2_stage2_vit_g_llama_7b')
-        >>> type(model)
-        <class 'mindformers.models.blip2.blip2_llama.Blip2ImageToTextGeneration'>
-    """
-
-    def __init__(self, config: VisualGLMConfig, **kwargs):
-        super(VisualGLMImageToTextGeneration, self).__init__(config, **kwargs)
-
-        self.llm_model.set_train(False)
-        self.one_prefix = ops.Ones()
-        self.expand_dims = P.ExpandDims()
-
-        self.query_length = self.config.qformer_config.query_length
-
-    def construct(self, image: ms.Tensor, pre_input_ids: ms.Tensor, post_input_ids: ms.Tensor):
-        """ VisualGLMImageToTextGeneration model network """
-        if len(pre_input_ids.shape) == 1:
-            pre_input_ids = self.expand_dims(pre_input_ids, 0)
-
-        if len(post_input_ids.shape) == 1:
-            post_input_ids = self.expand_dims(post_input_ids, 0)
-
-        batch_size = image.shape[0]
-        prefix_ones = self.one_prefix((batch_size, self.query_length), mstype.int32)
-
-        extend_text_input_ids = self.concat_2d([pre_input_ids, prefix_ones, post_input_ids])
-        projected_qformer_output = self.forward_qformer_and_proj(image)
-        return extend_text_input_ids, projected_qformer_output, pre_input_ids, post_input_ids
-
-    def generate_text_for_image(self, image: ms.Tensor, pre_input_ids: ms.Tensor, post_input_ids: ms.Tensor, **kwargs):
-        """generate text for image by calling llm generate"""
-        text_input_ids, projected_qformer_output, pre_input_ids, post_input_ids = self(image, pre_input_ids,
-                                                                                       post_input_ids)
-        output_ids = self.llm_model.generate(input_ids=text_input_ids.asnumpy(),
-                                             image_embeds=projected_qformer_output,
-                                             pre_input_ids=pre_input_ids.asnumpy(),
-                                             post_input_ids=post_input_ids.asnumpy(),
-                                             **kwargs)
-        return output_ids
-
-
-class VisualglmWithLora(VisualGLMModel):
-    """ visualglm net for lora finetune"""
-
-    def __init__(self, config):
-        super(VisualglmWithLora, self).__init__(config)
-        num_layers = config.text_config.num_layers
-        pet_config = config.text_config.pet_config
-        if not isinstance(pet_config.layer_range, list):
-            pet_config.layer_range = [i for i in range(int(pet_config.layer_range))]
-        exclude = [str(i) + "$" for i in range(num_layers) if i not in pet_config.layer_range]
-        if exclude:
-            pet_config.exclude_layers += exclude
-        logger.info(f"pet_config: {pet_config}")
-        pet_config.target_modules = r"query_key_value$|dense"
-        self.llm_model = LoraAdapter.get_pet_model(self.llm_model, pet_config)
-        self.batch_size = config.batch_size
-        freeze_delta(self, config.text_config.pet_config.pet_type,
-                     exclude=[r"*tk_delta_lora*"])
-        self.one_prefix = ops.Ones()
-        self.expand_dims = P.ExpandDims()
-        self.query_length = self.config.qformer_config.query_length
-
-        register_trainer_task()
-
-    def construct(self, image: ms.Tensor, input_ids: ms.tensor, labels: ms.Tensor, position_id,
-                  attention_mask):
-        """ model network """
-        qformer_output = self.forward_qformer_and_proj(image)
-        batch_size = input_ids.shape[0]
-        seq_length = input_ids.shape[1]
-
-        # [batch_size, max_txt_length, llm_hidden_size]
-        pre_input_id = self.slice(input_ids, (0, 0), (batch_size, 3), (1, 1))
-        post_input_id = self.slice(input_ids, (0, 3 + 32), (batch_size, seq_length), (1, 1))
-        pre_inputs_embedding = self.llm_model.to_text_embeddings(pre_input_id)
-        post_inputs_embedding = self.llm_model.to_text_embeddings(post_input_id)
-        pre_inputs_embedding = self.cast(pre_inputs_embedding, mstype.float32)
-        post_inputs_embedding = self.cast(post_inputs_embedding, mstype.float32)
-        llm_inputs_embeds = self.concat_3d([pre_inputs_embedding, qformer_output, post_inputs_embedding])
-        llm_attention_mask = self.cast(attention_mask, mstype.int32)
-        loss = self.llm_model(llm_inputs_embeds, input_ids, labels.astype(mstype.int32), position_id,
-                              llm_attention_mask)
-        return loss
-
-    def forward(self, image: ms.Tensor, pre_input_ids: ms.Tensor, post_input_ids: ms.Tensor):
-        """ forward by vit and qformer """
-        if len(pre_input_ids.shape) == 1:
-            pre_input_ids = self.expand_dims(pre_input_ids, 0)
-
-        if len(post_input_ids.shape) == 1:
-            post_input_ids = self.expand_dims(post_input_ids, 0)
-
-        batch_size = image.shape[0]
-        prefix_ones = self.one_prefix((batch_size, self.query_length), mstype.int32)
-
-        extend_text_input_ids = self.concat_2d([pre_input_ids, prefix_ones, post_input_ids])
-        projected_qformer_output = self.forward_qformer_and_proj(image)
-        return extend_text_input_ids, projected_qformer_output, pre_input_ids, post_input_ids
-
-    def generate_text_for_image(self, image: ms.Tensor, pre_input_ids: ms.Tensor, post_input_ids: ms.Tensor, **kwargs):
-        """generate text for image by calling llm generate"""
-        text_input_ids, projected_qformer_output, pre_input_ids, post_input_ids = self.forward(image, pre_input_ids,
-                                                                                               post_input_ids)
-        output_ids = self.llm_model.generate(input_ids=text_input_ids.asnumpy(),
-                                             image_embeds=projected_qformer_output,
-                                             pre_input_ids=pre_input_ids.asnumpy(),
-                                             post_input_ids=post_input_ids.asnumpy(),
-                                             **kwargs)
-        return output_ids
diff --git a/research/visualglm/visualglm_base.py b/research/visualglm/visualglm_base.py
deleted file mode 100644
index d8c185a8..00000000
--- a/research/visualglm/visualglm_base.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file was refer to project:
-# https://github.com/salesforce/LAVIS/tree/main/lavis/models/blip2_models
-# ============================================================================
-"""
-visualglm Base Model
-"""
-import mindspore as ms
-from mindspore.common.initializer import initializer, Normal
-
-from mindformers.models.modeling_utils import PreTrainedModel
-from mindformers.models.glm import GLMConfig
-from mindformers.modules.activation import GELU
-from mindformers.modules.layers import LayerNorm
-from qformer import BertLMHeadModel
-from visualglm_glm import GLMForPreTrainingForBlip2
-from visualglm_vit import ViTModelForBlip2
-from visualglm_config import VisualGLMConfig
-
-
-class VisualGLMPreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = VisualGLMConfig
-    base_model_prefix = "visualglm"
-
-
-class VisualGLMBase(VisualGLMPreTrainedModel):
-    """
-    VisualGLM base model, all VisualGLM models inherit this class.
-    """
-
-    def init_qformer(self):
-        """
-        Init qformer for VisualGLM model
-
-        Raises:
-            ValueError: qformer config wrong
-
-        Returns:
-            qformer, query_tokens
-        """
-        qformer_config = self.config.qformer_config
-        qformer_config.parallel_config = self.config.parallel_config
-        qformer = BertLMHeadModel(qformer_config)
-        if qformer is None:
-            raise ValueError("qformer configuration is wrong. \
-            please check 'qformer_config' is set in Blip2Config")
-        query_tokens = ms.Parameter(initializer(
-            Normal(mean=0.0, sigma=qformer_config.initializer_range),
-            [1, qformer_config.query_length, qformer_config.hidden_size]))
-        return qformer, query_tokens
-
-    def init_vision_encoder(self):
-        """
-        init vision encoder for VisualGLM model
-
-        Raises:
-            ValueError: vit config wrong
-
-        Returns:
-            visual_encoder, ln_vision
-        """
-        vision_config = self.config.vision_config
-        visual_encoder = None
-        if vision_config is not None:
-            visual_encoder = ViTModelForBlip2(vision_config)
-        if visual_encoder is None:
-            raise ValueError("visual_encoder configuration is wrong. \
-            please check 'vision_config' is set in Blip2Config")
-        for block in visual_encoder.blocks:
-            mapping = block.output.mapping
-            if mapping.activation_flag and isinstance(mapping.activation, GELU):
-                mapping.activation = GELU(approximate=False)
-
-        ln_vision = LayerNorm(visual_encoder.config.embed_dim)
-        return visual_encoder, ln_vision
-
-    def init_llm(self):
-        """"
-        init llm model for VisualGLM model
-
-        Raises:
-            ValueError: text config is wrong
-
-        Returns:
-            llm model
-
-        """
-        llm_config = self.config.text_config
-        if not llm_config:
-            raise ValueError("llm configuration is wrong. \
-                        please check 'text_config' is set in Blip2Config")
-
-        if isinstance(llm_config, GLMConfig):
-            llm_model = GLMForPreTrainingForBlip2(llm_config)
-        else:
-            raise ValueError("the glm-arch is support by the blip2")
-        return llm_model
diff --git a/research/visualglm/visualglm_config.py b/research/visualglm/visualglm_config.py
deleted file mode 100644
index efbca383..00000000
--- a/research/visualglm/visualglm_config.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-
-"""VisualGLM Config API"""
-from typing import Optional, Union
-
-import mindspore.common.dtype as mstype
-
-from mindformers.models.glm import GLMConfig
-from mindformers.models.vit import ViTConfig
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.models.configuration_utils import PretrainedConfig
-from mindformers.mindformer_book import MindFormerBook
-from mindformers.modules.transformer import TransformerOpParallelConfig, TransformerRecomputeConfig
-
-from mindformers.models.blip2.qformer_config import QFormerConfig
-
-__all__ = ['VisualGLMConfig']
-
-default_recompute_config = TransformerRecomputeConfig()
-default_parallel_config = TransformerOpParallelConfig(recompute=default_recompute_config)
-
-
-@MindFormerRegister.register(MindFormerModuleType.CONFIG)
-class VisualGLMConfig(PretrainedConfig):
-    r"""
-    Config For BLIP2 Module
-
-    Args:
-        model_type (Optional[int]):
-            model type for blip2 model, default is 'blip2'.
-        batch_size (Optional[int]):
-            batch size for input data, use in predict.
-        freeze_vision (Optional[bool]):
-            whether to freeze vit weights, default is True.
-        freeze_text (Optional[bool]):
-            whether to freeze LLM weights, default is True.
-        max_txt_len (Optional[int]):
-            max text length for glm model.
-        prompt (Optional[str]):
-            prompt for glm model.
-        prompt_length (Optional[int]):
-            prompt length for glm model.
-        checkpoint_name_or_path (Optional[str]):
-            checkpoint path or name used to load to the network.
-        dtype (Optional[str]):
-            layer digital type, default is "float32".
-        compute_dtype (Optional[str]):
-            Linear layer compute dtype, default is "float16".
-        layernorm_compute_type (Optional[str]):
-            layernorm compute dtype, default is "float32".
-        softmax_compute_type (Optional[str]):
-            softmax compute dtype, default is "float32".
-        vision_config (Optional[ViTConfig]):
-            config for ViTModel.
-        qformer_config (Optional[QFormerConfig]):
-            config for qformer.
-        text_config (Optional[glm]):
-            config for LLM model, like glm.
-        parallel_config(TransformerOpParallelConfig):
-            The parallel configure. Default `default_transformer_config`,
-            an instance of `TransformerOpParallelConfig` with default args.
-        is_training (Optional[bool]): whether the model is in training state.
-    Returns:
-        Class, Blip2Config.
-    """
-    model_type = "visualglm"
-
-    _support_list = MindFormerBook.get_config_support_list()['blip2']
-
-    def __init__(self,
-                 model_type: str = "blip2",
-                 batch_size: int = 8,
-                 freeze_vision: bool = True,
-                 freeze_text: bool = True,
-                 max_txt_len: int = 32,
-                 prompt: bool = False,
-                 prompt_length: int = 0,
-                 checkpoint_name_or_path: str = None,
-                 dtype: str = "float32",
-                 compute_dtype: str = "float16",
-                 layernorm_compute_type: str = "float32",
-                 softmax_compute_type: str = "float32",
-                 vision_config: Optional[ViTConfig] = ViTConfig(),
-                 qformer_config: Union[dict, QFormerConfig] = QFormerConfig(),
-                 text_config: Union[dict, GLMConfig] = GLMConfig(),
-                 parallel_config: TransformerOpParallelConfig = default_parallel_config,
-                 is_training: bool = True,
-                 micro_batch_interleave_num=1,
-                 **kwargs):
-        super(VisualGLMConfig, self).__init__(**kwargs)
-        if isinstance(qformer_config, dict):
-            qformer_config = QFormerConfig(**qformer_config)
-        if isinstance(text_config, dict):
-            text_config = GLMConfig(**text_config)
-        self.model_type = model_type
-        self.batch_size = batch_size
-        self.freeze_vision = freeze_vision
-        self.freeze_text = freeze_text
-        self.max_txt_len = max_txt_len
-        self.checkpoint_name_or_path = checkpoint_name_or_path
-        self.prompt = prompt
-        self.prompt_length = prompt_length
-
-        self.parallel_config = parallel_config
-        self.compute_dtype = mstype.float32 if compute_dtype == "float32" else mstype.float16
-        self.layernorm_compute_type = mstype.float32 if layernorm_compute_type == "float32" else mstype.float16
-        self.softmax_compute_type = mstype.float32 if softmax_compute_type == "float32" else mstype.float16
-        self.dtype = mstype.float32 if dtype == "float32" else mstype.float16
-        self.is_training = is_training
-        self.micro_batch_interleave_num = micro_batch_interleave_num
-
-        self.vision_config = vision_config
-        self.qformer_config = qformer_config
-
-        # self.text_config = text_config
-        self.text_config = text_config
-
-        # first stage is without text config
-        if self.text_config is not None:
-            self.text_config.parallel_config = parallel_config
-            self.text_config.compute_dtype = self.compute_dtype
-            self.text_config.layernorm_compute_type = self.layernorm_compute_type
-            self.text_config.softmax_compute_type = self.softmax_compute_type
-            self.text_config.dtype = self.dtype
-
-        parallel_config.pipeline_stage = 1
-
-        # pass configs to submodule config
-        self.qformer_config.parallel_config = parallel_config
-        self.qformer_config.compute_dtype = self.compute_dtype
-        self.qformer_config.layernorm_dtype = self.layernorm_compute_type
-        self.qformer_config.softmax_dtype = self.softmax_compute_type
-        self.qformer_config.dtype = self.dtype
-
-        self.vision_config.parallel_config = parallel_config
-        self.vision_config.compute_dtype = self.compute_dtype
-        self.vision_config.layernorm_compute_type = self.layernorm_compute_type
-        self.vision_config.softmax_compute_type = self.softmax_compute_type
-        self.vision_config.dtype = self.dtype
diff --git a/research/visualglm/visualglm_dataloader.py b/research/visualglm/visualglm_dataloader.py
deleted file mode 100644
index a03f1d7c..00000000
--- a/research/visualglm/visualglm_dataloader.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""VisualGLM DataLoader"""
-
-import random
-from typing import Callable, Union
-
-import numpy as np
-from mindspore.dataset import GeneratorDataset
-
-from mindformers.dataset.dataloader.sft_dataloader import SFTDataSet
-from mindformers.tools.register import MindFormerModuleType, MindFormerRegister
-
-
-def custom_map_func(row_dict, **kwargs):
-    """Default data parsing function.Returns the first three values of `row_dict`."""
-    kwargs.clear()
-    values = list(row_dict.values())
-    if len(values) == 1:
-        return dict(img=values[0], prompt="", label="")
-    if len(values) == 2:
-        return dict(img=values[0], prompt=values[1], label="")
-    return dict(img=values[0], prompt=values[1], label=values[2])
-
-@MindFormerRegister.register(MindFormerModuleType.DATASET_LOADER)
-class VisualGLMDataLoader:
-    """VisualGLM DataLoader"""
-    def __new__(cls,
-                dataset_dir: str,
-                tokenizer: Union[str, dict, Callable],
-                column_names: str,
-                dataset_name: str = "",
-                file_format: str = None,
-                customized_reader: Callable = None,
-                customized_parser: Callable = None,
-                shuffle: bool = False,
-                scale: int = 1,
-                random_mapping: bool = False,
-                **kwargs):
-        r"""
-        VisualGLM DataLoader implementation.
-        Args:
-            dataset_dir (str): The directory path to parquet text with hdfs.
-            dataset_name (str): Dataset name. Currently, ["wikitext"] is supported.
-            file_format (str): Retrieves the end character of the desired file name.
-            customized_reader (Callable): User-defined functions for reading data.
-                The input parameter is the path of the dataset file.
-                The return value is a list of many sentences.
-            customized_parser (Callable): User-defined function for parsing data.
-                The input parameter is a dictionary that contains a single line of data.
-                There are three return values: prompt, answerh and label. If a value is not required,
-                an empty string is returned.
-            shuffle (Optional[bool]): Whether or not to perform shuffle on the dataset.
-                Random accessible input is required.
-                Default: True, expected order behavior shown in the table below.
-
-        Return:
-            A GeneratorDataset object.
-
-        Raises:
-            ValueError: Error input for dataset_dir.
-            TypeError: Type error for column_names.
-
-        Examples:
-            >>> from visualglm_dataloader import VisualGLMDataLoader
-            >>> data_loader = VisualGLMDataLoader(dataset_dir="The required task dataset path",
-            ...                                    dataset_name="alpaca",
-            ...                                    file_format="json",
-            ...                                    shuffle=True)
-            >>> data_loader = data_loader.batch(1)
-            >>> for item in data_loader:
-            >>>     print(item)
-            >>>     break
-        """
-        del customized_parser
-        if random_mapping:
-            dataset = SFTRandomMappingDataSet(dataset_dir, column_names, tokenizer, dataset_name, file_format,
-                                              customized_reader, map_function=custom_map_func, scale=scale)
-        else:
-            dataset = SFTDataSet(dataset_dir, column_names=column_names, tokenizer=tokenizer, dataset_name=dataset_name,
-                                 file_format=file_format, read_function=customized_reader,
-                                 map_function=custom_map_func)
-        return GeneratorDataset(dataset, column_names=column_names, shuffle=shuffle, **kwargs)
-
-
-class SFTRandomMappingDataSet(SFTDataSet):
-    """
-    sftdataset with random mapping
-    """
-    def __init__(self, dataset_dir, column_names, tokenizer, dataset_name=None, file_format=None,
-                 customized_reader=None, map_function=custom_map_func, scale=1):
-        super().__init__(dataset_dir=dataset_dir,
-                         column_names=column_names,
-                         tokenizer=tokenizer,
-                         dataset_name=dataset_name,
-                         file_format=file_format,
-                         read_function=customized_reader,
-                         map_function=map_function
-                         )
-
-        self.scale = scale
-
-    def __len__(self):
-        return (self.table.shape[0]) * self.scale
-
-    def __getitem__(self, index):
-        rng = random.Random(index)
-        rng = np.random.RandomState(seed=[rng.randint(0, 2 ** 32 - 1) for _ in range(16)])
-        i = rng.randint(self.table.shape[0])
-        return super().__getitem__(i)
diff --git a/research/visualglm/visualglm_dataset.py b/research/visualglm/visualglm_dataset.py
deleted file mode 100644
index 4c1b0ad5..00000000
--- a/research/visualglm/visualglm_dataset.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Causal Image Modeling Dataset."""
-import copy
-import os
-import re
-
-import numpy as np
-from PIL import Image
-import mindspore.common.dtype as mstype
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore.dataset import vision
-from mindspore.dataset.vision.utils import Inter
-
-from mindformers.dataset.base_dataset import BaseDataset
-from mindformers.dataset.dataloader import build_dataset_loader
-from mindformers.models.build_tokenizer import build_tokenizer
-from mindformers.tools.logger import logger
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.version_control import get_dataset_map
-
-
-def get_input_data_batch_slice_map(input_ids, eod_token_id, dis, rank_id: int = 0):
-    """
-    Generate position_id and attention_mask according to input_ids considering eod reset
-
-    Args:
-        input_ids: the input token ids
-        eod_token_id: the id for <EOD>
-        dis: the slice value for each rank
-        rank_id: the current rank id
-    Returns:
-        batch_input_ids: the input token ids
-        batch_position_ids: the position ids cosidering eod reset
-        batch_attention_mask: the attention mask considering eod reset
-    """
-    rank = int(rank_id)
-    input_ids = input_ids[rank*dis: (rank + 1)*dis]
-    seq_length = input_ids.shape[1] - 1
-    # Initialize position_ids and attention_mask
-    batch_input_ids = input_ids
-    batch_position_ids = np.ones((dis, seq_length))
-    batch_attention_mask = np.ones((dis, seq_length, seq_length))
-
-    # Loop through batches
-    for bs_i in range(len(input_ids)):
-        # Get normal position_ids and attention_mask
-        local_ids = input_ids[bs_i]
-        batch_attention_mask[bs_i] = np.tril(np.ones(shape=(seq_length, seq_length)))
-        batch_position_ids[bs_i] = np.arange(seq_length)
-        # Find the index of <EOS>
-        eod_index = batch_position_ids[bs_i, local_ids[:-1] == eod_token_id].astype(np.int32)
-        prev_index = 0
-        for i in range(eod_index.size):
-            # Reset position_ids and attention_mask considering <EOS>
-            index = eod_index[i]
-            batch_attention_mask[bs_i, (index + 1):, :(index + 1)] = 0
-            batch_position_ids[bs_i, (index + 1):] -= (index + 1 - prev_index)
-            prev_index = index + 1
-    return batch_input_ids, batch_position_ids, batch_attention_mask
-
-
-@MindFormerRegister.register(MindFormerModuleType.DATASET)
-class VisualGLMDataset(BaseDataset):
-    """
-    Causal Language Model pretrain dataset.
-    output input_ids columns
-
-    Args:
-        dataset_config (dict): Config for dataset.
-
-    Returns:
-        A dataset for CausalLanguageModelDataset.
-
-    Examples:
-        >>> from mindformers.tools.register import MindFormerConfig
-        >>> from mindformers import MindFormerBook
-        >>> from mindformers.dataset import CausalLanguageModelDataset
-        >>> from mindformers.dataset import build_dataset, check_dataset_config
-        >>> config_dict_list = MindFormerBook.get_trainer_support_task_list()
-        >>> config_path = config_dict_list['text_generation']['gpt2']
-        >>> # Initialize a MindFormerConfig instance with a specific config file of yaml.
-        >>> config = MindFormerConfig(config_path)
-        >>> config.train_dataset.data_loader.dataset_dir = "The required task dataset path"
-        >>> # Note:
-        >>> #     The detailed data setting could refer to
-        >>> #     https://gitee.com/mindspore/mindformers/blob/dev/docs/model_cards/gpt2.md
-        >>> check_dataset_config(config)
-        >>> # use class to build dataset
-        >>> dataset_from_class = CausalLanguageModelDataset(config.train_dataset_task.dataset_config)
-    """
-
-    def __new__(cls, dataset_config: dict = None):
-        logger.info("Now Create VisualGLM Model Dataset.")
-        rank_id = int(os.getenv("RANK_ID", "0"))
-        device_num = int(os.getenv("RANK_SIZE", "1"))
-        dataset_config = copy.deepcopy(dataset_config)
-        cls.init_dataset_config(dataset_config)
-        rank_id, device_num = cls._check_device_rank_for_parallel(rank_id, device_num)
-        dataset_config.rank_id = rank_id
-        dataset_config.device_num = device_num
-        if dataset_config.data_loader.type != "MindDataset" and \
-                dataset_config.data_loader.type != "TFRecordDataset":
-            dataset = cls._process_raw_text_data(dataset_config)
-        else:
-            dataset = cls._process_mindrecord_data(dataset_config)
-
-        type_cast_op = C.TypeCast(mstype.int32)
-        if dataset_config.eod_reset:
-            if cls._is_semi_full_batch() or cls._is_data_parallel():
-                rank_id = 0
-                dis = dataset_config.batch_size
-            else:
-                # Each card slice a small batch from the full batch
-                dis = dataset_config.batch_size // device_num
-                if dataset_config.batch_size % device_num != 0:
-                    raise ValueError(
-                        f"batch size {dataset_config.batch_size} should be a multiple of device number {device_num}."
-                        " You should change the args: per_batch_size.")
-
-            dataset = dataset.batch(dataset_config.batch_size,
-                                    drop_remainder=dataset_config.drop_remainder,
-                                    output_columns=dataset_config.input_columns)
-            map_func = lambda input_ids: get_input_data_batch_slice_map(input_ids,
-                                                                        eod_token_id=dataset_config.eod_token_id,
-                                                                        rank_id=rank_id,
-                                                                        dis=dis)
-            dataset = get_dataset_map(dataset, map_func,
-                                      input_columns=dataset_config.input_columns,
-                                      output_columns=dataset_config.output_columns)
-            dataset = dataset.project(columns=dataset_config.output_columns)
-
-            for input_arg in dataset_config.output_columns:
-                if "image" in input_arg:
-                    continue
-                dataset = get_dataset_map(dataset, type_cast_op,
-                                          input_columns=input_arg)
-        else:
-            dataset = dataset.batch(dataset_config.batch_size,
-                                    drop_remainder=dataset_config.drop_remainder,
-                                    output_columns=dataset_config.input_columns,
-                                    num_parallel_workers=dataset_config.num_parallel_workers)
-            dataset = dataset.project(columns=dataset_config.input_columns)
-            for input_arg in dataset_config.input_columns:
-                if "image" in input_arg:
-                    continue
-                dataset = get_dataset_map(dataset, type_cast_op,
-                                          input_columns=input_arg)
-
-        dataset = dataset.repeat(dataset_config.repeat)
-
-        return dataset
-
-    @classmethod
-    def _prepare_for_model(cls, dataset, dataset_config):
-        """ preprocess for model """
-        from mindformers import Blip2ImageProcessor
-        tokenizer_config = dataset_config.tokenizer
-        tokenizer = build_tokenizer(tokenizer_config)
-        image_processor = Blip2ImageProcessor(224, interpolation="bicubic")
-        image_processor.resize.resize = vision.transforms.Resize((224, 224), Inter.BICUBIC)
-        input_columns = dataset_config.input_columns
-        max_source_length = dataset_config.max_source_length
-        max_target_length = dataset_config.max_target_length
-        max_seq_length = max_source_length + max_target_length
-
-        def sft_visualglm_map_func(img, prompt, label):
-            """Prepare input data for model fine-tuning or evaluation."""
-            img = str(img)
-            prompt = str(prompt)
-            label = str(label)
-
-            image = image_processor(Image.open(img).convert("RGB"))
-            input0 = tokenizer.encode("<img>", add_special_tokens=False)
-            input1 = [tokenizer.pad_token_id] * 32
-            input2 = tokenizer.encode("</img>问："+prompt+"\n答：", add_special_tokens=False)
-            a_ids = sum([input0, input1, input2], [])
-            b_ids = tokenizer.encode(text=label, add_special_tokens=False)
-            if len(a_ids) > max_source_length - 1:
-                a_ids = a_ids[: max_source_length - 1]
-            if len(b_ids) > max_target_length - 2:
-                b_ids = b_ids[: max_target_length - 2]
-
-            input_ids = tokenizer.build_inputs_with_special_tokens(a_ids, b_ids)
-            input_id_len = len(input_ids)
-            context_length = input_ids.index(tokenizer.bos_token_id)
-            labels = [-100] * context_length + input_ids[context_length:]
-            pad_len = max_seq_length - input_id_len
-            input_ids = input_ids + [tokenizer.pad_token_id] * pad_len
-            labels = labels + [-100] * pad_len
-
-            ignore_pad_token_for_loss = False
-            if ignore_pad_token_for_loss:
-                labels = [(l if l != tokenizer.pad_token_id else -100) for l in labels]
-            image = image.asnumpy()
-            image = image.squeeze(0)
-            position_id = cls._create_position_ids(np.array(input_ids))
-            attention_mask = cls._get_masks(np.array(input_ids))
-
-            return tuple([image, input_ids, labels, position_id, attention_mask])
-
-        dataset = dataset.map(sft_visualglm_map_func,
-                              input_columns=["img", "prompt", "label"],
-                              output_columns=input_columns)
-        return dataset
-
-    @classmethod
-    def _get_masks(cls, input_ids, bos_token_id=130004):
-        """generate mask from input id"""
-        batch_size = 1
-        seq_length = input_ids.shape[0]
-        input_ids = [input_ids]
-        context_lengths = [list(seq).index(bos_token_id) for seq in input_ids]
-        attention_mask = np.tril(np.ones((batch_size, seq_length, seq_length)))
-        for i, context_length in enumerate(context_lengths):
-            attention_mask[i, :, :context_length] = 1
-        attention_mask = np.expand_dims(attention_mask, axis=1)
-        attention_mask = np.array(attention_mask < 0.5, np.bool_).squeeze(0)
-        return attention_mask
-
-    @classmethod
-    def _get_position_ids(cls, input_ids, mask_positions, use_gmasks=None,
-                          bos_token_id=130004, position_encoding_2d=True):
-        """generate position ids from input id and mask positions"""
-
-        seq_length = input_ids.shape[0]
-        if use_gmasks is None:
-            use_gmasks = [False]
-        mask = bos_token_id * np.ones(shape=(seq_length), dtype=np.int32)
-        mask = np.equal(input_ids, mask)
-        # 要求input_ids中有且仅有一个bos_token_id
-        context_lengths = np.argwhere(mask)[:, -1]
-        if position_encoding_2d:
-            position_ids = np.arange(seq_length, dtype=np.int64)
-            for i, context_length in enumerate(context_lengths):
-                position_ids[context_length:] = mask_positions[i]
-            block_position_ids = [np.concatenate((
-                np.zeros(context_length, dtype=np.int64),
-                np.arange(seq_length - context_length, dtype=np.int64) + 1
-            )) for context_length in context_lengths]
-            block_position_ids = np.stack(block_position_ids, axis=0).squeeze()
-            position_ids = np.stack((position_ids, block_position_ids), axis=0)
-        else:
-            position_ids = np.arange(seq_length, dtype=np.int64)
-            for i, context_length in enumerate(context_lengths):
-                if not use_gmasks[i]:
-                    position_ids[context_length:] = mask_positions[i]
-        return position_ids
-
-    @classmethod
-    def _create_position_ids(cls, input_ids, gmask_token_id=130001):
-        """generate position ids from input id"""
-
-        seq_length = input_ids.shape[0]
-        seqs = input_ids
-        # 要求input_ids中, 每行有且仅有一个gMASK
-        use_gmasks = gmask_token_id * np.ones(shape=(seq_length), dtype=np.int32)
-        mask = np.equal(seqs, use_gmasks)
-        mask_positions = np.argwhere(mask)[:, -1]
-
-        position_ids = cls._get_position_ids(input_ids, mask_positions=mask_positions, use_gmasks=use_gmasks)
-        return position_ids
-
-    @classmethod
-    def _process_raw_text_data(cls, dataset_config):
-        """Process the text data"""
-        dataset_dir = dataset_config.data_loader.pop("dataset_dir")
-
-        tokenizer_config = dataset_config.tokenizer
-        tokenizer = build_tokenizer(tokenizer_config)
-
-        # 通过data_loader从数据集中加载数据
-        dataset = build_dataset_loader(
-            dataset_config.data_loader, default_args={'dataset_dir': dataset_dir,
-                                                      'num_shards': dataset_config.device_num,
-                                                      'shard_id': dataset_config.rank_id,
-                                                      'column_names': dataset_config.data_loader.column_names,
-                                                      'tokenizer': tokenizer,
-                                                      'scale': dataset_config.data_loader.scale,
-                                                      'random_mapping': dataset_config.data_loader.random_mapping,
-                                                      'shuffle': dataset_config.data_loader.shuffle})
-
-        dataset = cls._prepare_for_model(dataset, dataset_config)
-        return dataset
-
-    @classmethod
-    def _process_mindrecord_data(cls, dataset_config):
-        """Process the mindrecord data"""
-        dataset_files = []
-        mind_compile = re.compile("mindrecord0*$")
-        if dataset_config.data_loader.dataset_dir:
-            data_dir = dataset_config.data_loader.pop("dataset_dir")
-            if os.path.isdir(data_dir):
-                for r, _, f in os.walk(data_dir):
-                    for file in f:
-                        if re.findall(mind_compile, file) or file.endswith(".tfrecord"):
-                            dataset_files.append(os.path.join(r, file))
-                dataset_files.sort()
-            else:
-                if re.findall(mind_compile, data_dir) or data_dir.endswith(".tfrecord"):
-                    dataset_files = data_dir
-        elif dataset_config.data_loader.dataset_files:
-            dataset_files = dataset_config.data_loader.dataset_files
-            if isinstance(dataset_files, (list, tuple)):
-                dataset_files = list(dataset_files)
-        else:
-            raise ValueError(f"data_loader must contain dataset_dir or dataset_files,"
-                             f"but get {dataset_config.data_loader}.")
-
-        dataset = build_dataset_loader(
-            dataset_config.data_loader, default_args={'dataset_files': dataset_files,
-                                                      'num_shards': dataset_config.device_num,
-                                                      'shard_id': dataset_config.rank_id,
-                                                      'columns_list': dataset_config.input_columns})
-        return dataset
diff --git a/research/visualglm/visualglm_glm.py b/research/visualglm/visualglm_glm.py
deleted file mode 100644
index 05058fd2..00000000
--- a/research/visualglm/visualglm_glm.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""visualglm language model."""
-from mindspore import dtype as mstype
-from mindspore import ops
-from mindspore.ops import operations as P
-
-from mindformers import CrossEntropyLoss
-from mindformers.models.glm.attention import default_dpmp_config
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.models.glm.glm import GLMModel, GLMForPreTraining
-from mindformers.models.glm.glm_config import GLMConfig
-
-from layers import ImageTextEmbeddingPreparationMixIn
-from attention import SelfAttentionAdapter
-
-
-class GLMModelForBlip2(GLMModel):
-    """
-    The backbone of GLM network
-
-    Args:
-        config (GLMConfig): The config of network.
-        op_parallel_config (optional): Operator parallel strategy. Default: `OpParallelConfig()`.
-        embed_parallel_config (optional): Operator parallel strategy. Default: `EmbeddingOpParallelConfig()`.
-    """
-
-    def __init__(self, config):
-        super().__init__(config)
-
-        op_parallel_config = default_dpmp_config
-        if config.parallel_config:
-            op_parallel_config = config.parallel_config
-
-        # adapter
-        self.modify_attention_fn(config, op_parallel_config)
-
-    def modify_attention_fn(self, config, op_parallel_config):
-        """replace default attention func"""
-        for i in range(config.num_layers):
-            layer = self.layers[i]
-            layer_id = i + 1
-            layer.attention = SelfAttentionAdapter(
-                config.hidden_size,
-                config.batch_size,
-                config.num_heads,
-                op_parallel_config,
-                config.attention_dropout_rate,
-                config.hidden_dropout_rate,
-                layer_id,
-                max_seq_len=config.seq_length,
-                hidden_size_per_attention_head=config.hidden_size_per_attention_head,
-                position_encoding_2d=config.position_encoding_2d,
-                bias=True,
-                params_dtype=config.param_init_type,
-                softmax_dtype=config.softmax_compute_type,
-                compute_dtype=config.compute_dtype,
-                use_past=config.use_past
-            )
-
-    def construct(self, input_embeddings, position_ids, attention_mask, init_reset=True, batch_valid_length=None):
-        """
-        Get output logits
-
-        Inputs:
-            input_ids (Tensor): The tokenized inputs with dtype int32.
-            input_mask (Tensor): The mask indicating whether each position is a valid input.
-            position_ids (Tensor): Used to identify each token's position in the list of tokens.
-            attention_mask (Tensor): Used when batching sequences together.
-            init_reset (bool, optional): Default: True.
-            batch_valid_length (Tensor, optional): Default: None.
-
-        Returns:
-            logits (Tensor): The output logit of backbone.
-            table (Tensor): The embedding table for the vocabulary.
-        """
-        if attention_mask is None:
-            attention_mask = ops.ones((1, 1), mstype.int32)
-
-        hidden_states = input_embeddings
-        for i in range(self.num_layers):
-            layer_ret = self.layers[i](hidden_states, attention_mask, position_ids, init_reset, batch_valid_length)
-
-            if isinstance(layer_ret, tuple):
-                layer_ret = layer_ret[0]
-            hidden_states = layer_ret
-
-        # Final layer norm.
-        if self.use_final_layernorm:
-            logits = self.final_layernorm(hidden_states)
-        else:
-            logits = hidden_states
-
-        return logits
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class GLMForPreTrainingForBlip2(GLMForPreTraining, ImageTextEmbeddingPreparationMixIn):
-    r"""
-    Provide glm training loss or logits through network.
-
-    Args:
-        config (GLMConfig): The config of GLMModel.
-
-    Examples:
-        >>> from mindformers import GLMForPreTraining
-        >>> model = GLMForPreTraining.from_pretrained("glm_6b")
-        >>> type(model)
-        <class 'mindformers.models.glm.glm.GLMForPreTraining'>
-    """
-
-    def __init__(self, config: GLMConfig):
-        checkpoint_name_or_path = config.checkpoint_name_or_path
-        config.checkpoint_name_or_path = ""
-
-        GLMForPreTraining.__init__(self, config=config)
-        ImageTextEmbeddingPreparationMixIn.__init__(self, config=config)
-
-        self.transformer = GLMModelForBlip2(config)
-
-        self.config.checkpoint_name_or_path = checkpoint_name_or_path
-
-        self.loss = CrossEntropyLoss(parallel_config=config.parallel_config)
-        self.cast_1d = P.Cast()
-        self.mul_1d = P.Mul().shard(((1,), (1,)))
-        self.reshape = P.Reshape()
-        self.not_equal_1d = P.NotEqual().shard(((1,), ()))
-        self.batch_size = config.batch_size
-        self.vocab_size = config.vocab_size
-        self.load_checkpoint(config) # todo lite推理注释，ms放开
-
-    def to_text_embeddings(self, text_input_ids):
-        """
-        create text embeddings from input ids
-        :param text_input_ids: text input id
-        :return: text embedding
-        """
-        input_embeds_raw = self.transformer.word_embeddings(text_input_ids)
-        input_embeds = input_embeds_raw[0]
-        input_embeds = self.transformer.embedding_dropout(input_embeds)
-        return input_embeds
-
-    def prepare_inputs_for_generation(self, input_ids, **kwargs):
-        """prepare inputs for generation."""
-        return self.prepare_image_text_embedding(input_ids, **kwargs)
-
-    # pylint: disable=W0613
-    def construct(self, input_embeddings=None, input_ids=None, labels=None, position_ids=None, attention_mask=None,
-                  input_position=None, input_embeds=None, init_reset=True, batch_valid_length=None):
-        """
-        Extract logits and calculate loss
-
-        Inputs:
-            input_ids (Tensor): the tokenized inputs with dtype int32.
-            labels (Tensor): the indices of input sequence tokens in the vocabulary.
-            position_ids (Tensor): used to identify each token's position in the list of tokens.
-            attention_mask (Tensor): used when batching sequences together.
-            input_position(Tensor): Reserved param, not used.
-            input_embeds(Tensor): Reserved param, not used.
-            init_reset (bool, optional): Default: True.
-            batch_valid_length(Tensor, optional): Default: None.
-
-        Returns:
-            Training phase:
-                loss: Training loss.
-            Other phase:
-                logits (Tensor): The output logit of backbone.
-        """
-
-        if input_embeddings is None and input_ids is not None:  # for incremental infer
-            input_embeddings = self.to_text_embeddings(input_ids)
-
-        output_states = self.transformer(input_embeddings, position_ids, attention_mask, init_reset, batch_valid_length)
-        logits = self.lm_head(output_states)
-
-        seq_length = output_states.shape[1]
-        logits_shape = logits.shape
-        if not self.training:
-            logits = logits.reshape((-1, logits_shape[-1]))
-            # only gather in auto-aggressive generate or first iteration
-            if (not self.use_past or self.is_first_iteration) and input_position is not None:
-                logits = self.gather(logits, input_position, 0)
-            return (logits,)
-
-        logits_reshape = logits.reshape((self.batch_size, seq_length, self.vocab_size))
-
-        shift_logits = logits_reshape[..., :-1, :]
-        shift_labels = labels[..., 1:]
-
-        logits_view = shift_logits.view((-1, shift_logits.shape[-1]))
-        labels_view = shift_labels.view(-1)
-
-        input_mask = self.cast_1d(self.not_equal_1d(shift_labels, -100), mstype.float32)
-        input_mask = self.reshape(input_mask, (-1,))
-
-        loss = self.loss(logits_view, labels_view, input_mask)
-        # loss = self.loss(logits_view, labels_view)
-        return loss
diff --git a/research/visualglm/visualglm_lr_schedule.py b/research/visualglm/visualglm_lr_schedule.py
deleted file mode 100644
index 61e2cba5..00000000
--- a/research/visualglm/visualglm_lr_schedule.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file was refer to project:
-# https://github.com/huawei-noah/Pretrained-Language-Model/blob/master/PanGu-%CE%B1/utils.py
-# https://github.com/huggingface/transformers/blob/main/src/transformers/optimization.py
-# ============================================================================
-"""AnnealingLR LR Schedule."""
-import math
-import mindspore.common.dtype as mstype
-from mindspore.common.tensor import Tensor
-from mindspore.nn.learning_rate_schedule import LearningRateSchedule
-from mindspore.ops import operations as P
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-
-__all__ = ['AnnealingLR']
-
-
-@MindFormerRegister.register(MindFormerModuleType.LR)
-class AnnealingLR(LearningRateSchedule):
-    """ AnnealingLR implementation for visualglm """
-    DECAY_STYLES = ["linear", "cosine", "exponential", "constant", "None"]
-
-    def __init__(self, learning_rate, warmup_steps, num_iters, total_steps, decay_style="cosine", last_iter=-1,
-                 decay_ratio=0.1, auto_warmup_steps=100, auto_warmup_rate=0.05):
-        super(AnnealingLR, self).__init__()
-        self.total_steps = total_steps
-        self.start_lr = learning_rate
-        self.warmup_iter = Tensor(warmup_steps, mstype.float32)
-        self.init_step = last_iter
-        self.num_iters = Tensor(last_iter + 1, mstype.float32)
-        self.end_iter = Tensor(num_iters, mstype.float32)
-        self.decay_style = decay_style.lower() if isinstance(decay_style, str) else None
-        self.decay_ratio = 1 / decay_ratio
-        self.auto_warmup_steps = auto_warmup_steps
-        self.auto_warmup_rate = auto_warmup_rate
-
-        self.cos = P.Cos()
-        self.min = P.Minimum()
-
-    def construct(self, global_step):
-        """ method entrance """
-        if global_step <= self.init_step + self.auto_warmup_steps:
-            auto_lr = float(self.start_lr) * self.auto_warmup_rate
-            schedule_lr = float(self.start_lr) * global_step / self.warmup_iter
-            return self.min(auto_lr, schedule_lr)
-
-        if self.warmup_iter > 0 and global_step <= self.warmup_iter:
-            return float(self.start_lr) * global_step / self.warmup_iter
-
-        if self.decay_style == self.DECAY_STYLES[0]:
-            return self.start_lr * ((self.end_iter - (global_step - self.warmup_iter)) / self.end_iter)
-
-        if self.decay_style == self.DECAY_STYLES[1]:
-            tmp_decay_step_ratio = (global_step - self.warmup_iter) / self.end_iter
-            decay_step_ratio = self.min(1.0, tmp_decay_step_ratio)
-            return self.start_lr / self.decay_ratio * (
-                (self.cos(math.pi * decay_step_ratio) + 1) * (self.decay_ratio - 1) / 2 + 1)
-        if self.decay_style == self.DECAY_STYLES[2]:
-            return self.start_lr
-        return self.start_lr
diff --git a/research/visualglm/visualglm_processor.py b/research/visualglm/visualglm_processor.py
deleted file mode 100644
index a3f31536..00000000
--- a/research/visualglm/visualglm_processor.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-""" visualglm processor implementation"""
-
-from typing import Optional, Union, List
-
-import PIL
-import PIL.Image
-import mindspore as ms
-import numpy as np
-
-from mindformers.dataset.transforms.vision_transforms import (
-    BatchPILize,
-    BatchResize,
-    BatchToTensor,
-    BatchNormalize
-)
-from mindformers.models.tokenization_utils_base import PreTrainedTokenizerBase
-from mindformers.models.image_processing_utils import BaseImageProcessor
-from mindformers.models.processing_utils import ProcessorMixin
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-
-
-@MindFormerRegister.register(MindFormerModuleType.PROCESSOR)
-class VisualGLMImageProcessor(BaseImageProcessor):
-    """
-    VisualGLMImageProcessor.
-
-    Args:
-        image_size (int): The target size.
-
-    Examples:
-        >>> from mindformers import Blip2ImageProcessor
-        >>> from mindformers.tools.image_tools import load_image
-        >>> processor = Blip2ImageProcessor(image_size=224)
-        >>> image = load_image("https://ascend-repo-modelzoo.obs.cn-east-2."
-            "myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-        >>> processor(image)
-            Tensor(shape=[1, 3, 224, 224], dtype=Float32, value=
-            [[[[-1.55868769e+00, -1.52949083e+00, ... -1.48569560e+00, -1.48569560e+00],
-            [-1.54408932e+00, -1.52949083e+00, ... -1.50029397e+00, -1.50029397e+00],
-            [-1.52949083e+00, -1.52949083e+00, ... -1.50029397e+00, -1.50029397e+00],
-            ...
-            [-1.38067937e+00, -1.48021984e+00, ... -1.30957901e+00, -1.40911949e+00],
-            [-1.46599972e+00, -1.43755960e+00, ... -1.48021984e+00, -1.43755960e+00],
-            [-1.40911949e+00, -1.28113890e+00, ... -1.48021984e+00, -1.43755960e+00]]]])
-    """
-
-    def __init__(self,
-                 image_size: Optional[int] = 224,
-                 interpolation: Optional[str] = 'bicubic',
-                 mean=(0.48145466, 0.4578275, 0.40821073),
-                 std=(0.26862954, 0.26130258, 0.27577711),
-                 is_hwc=False,
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.image_size = image_size
-        if isinstance(image_size, int):
-            self.image_size = (image_size,) * 2
-        self.interpolation = interpolation
-        self.mean = mean
-        self.std = std
-        self.is_hwc = is_hwc
-        self.resize = BatchResize(self.image_size, interpolation=self.interpolation)
-
-    def preprocess(self, images: Union[ms.Tensor, PIL.Image.Image,
-                                       np.ndarray, List[PIL.Image.Image]], **kwargs):
-        r"""
-        Preprocess Required By Base Processor.
-
-        Args:
-            images (ms.Tensor, PIL.Image, numpy.array, List[PIL.Image]): A batch of images.
-
-        Return:
-            A 4-rank tensor for a batch of images.
-        """
-        pilize = BatchPILize()
-        to_tensor = BatchToTensor()
-        normalize = BatchNormalize(self.mean, self.std, self.is_hwc)
-
-        images = pilize(images)
-        images = self.resize(images)
-        images = to_tensor(images)
-        images = normalize(images)
-
-        kwargs.pop("other", None)
-        if isinstance(images, list):
-            return ms.Tensor(np.row_stack([np.expand_dims(item, axis=0) for item in images]))
-        if len(images.shape) == 4:
-            return ms.Tensor(images)
-        return ms.Tensor(np.expand_dims(images, axis=0))
-
-    def _bhwc_check(self, image_batch: Union[ms.Tensor, PIL.Image.Image,
-                                             np.ndarray, List[PIL.Image.Image]]):
-        r"""Bhwc_check"""
-        if isinstance(image_batch, np.ndarray):
-            if image_batch.shape[-1] == 3:
-                return True
-        if isinstance(image_batch, ms.Tensor):
-            if image_batch.asnumpy().shape[-1] == 3:
-                return True
-        if isinstance(image_batch, (list, PIL.Image.Image)):
-            return True
-        return False
-
-
-@MindFormerRegister.register(MindFormerModuleType.PROCESSOR)
-class VisualGLMProcessor(ProcessorMixin):
-    r"""Blip2 Processor,
-    consists of a feature extractor (BaseFeatureEXtractor) for image input,
-    and a tokenizer (PreTrainedTokenizerBase) for text input.
-
-    Args:
-        image_processor (BaseImageProcessor): Used for process image data.
-        tokenizer (PreTrainedTokenizerBase): Used for process text data.
-        max_length (Optional[int]): The length of text tokens.
-        padding (Optional[str]): The padding strategy of tokenizer, [None, "max_length"].
-        return_tensors (Optional[str]): The type of returned tensors for tokenizer, [None, "ms"].
-
-    Examples:
-        >>> from mindformers import Blip2Processor
-        >>> from mindformers.tools.image_tools import load_image
-        >>> image = load_image("https://ascend-repo-modelzoo.obs.cn-east-2."
-        ...  "myhuaweicloud.com/XFormer_for_mindspore/clip/sunflower.png")
-        >>> text = ["a boy", "a girl"]
-        >>> Blip2Processor.show_support_list()
-        INFO - support list of Blip2Processor is:
-        INFO -    ['blip2_stage1_vit_g', 'blip2_stage1_classification']
-        INFO - -------------------------------------
-        >>> processor = Blip2Processor.from_pretrained('blip2_stage1_vit_g')
-        INFO - processor built successfully!
-        >>> processor(image, text)
-        {'image': Tensor(shape=[1, 3, 224, 224], dtype=Float32, value=
-    [[[[-1.55868769e+00, -1.52949083e+00, -1.55868769e+00 ... -1.48569560e+00, -1.48569560e+00],
-       [-1.54408932e+00, -1.52949083e+00, -1.54408932e+00 ... -1.50029397e+00, -1.50029397e+00],
-       [-1.52949083e+00, -1.52949083e+00, -1.52949083e+00 ... -1.50029397e+00, -1.50029397e+00],
-       ...
-       [-1.38067937e+00, -1.48021984e+00, -1.38067937e+00 ... -1.30957901e+00, -1.40911949e+00],
-       [-1.46599972e+00, -1.43755960e+00, -1.26691878e+00 ... -1.48021984e+00, -1.43755960e+00],
-       [-1.40911949e+00, -1.28113890e+00, -1.30957901e+00 ... -1.48021984e+00, -1.43755960e+00]
-       ]]]),
-       'text': Tensor(shape=[2, 32], dtype=Int32, value=
-       [[ 101, 1037, 2879 ...    0,    0,    0],
-       [ 101, 1037, 2611 ...    0,    0,    0]])}
-    """
-
-    attributes = ["tokenizer", "image_processor"]
-    image_processor_class = "AutoImageProcessor"
-    tokenizer_class = "AutoTokenizer"
-
-    def __init__(self, image_processor, tokenizer,
-                 max_length=32, padding='max_length', return_tensors='ms'):
-        super(VisualGLMProcessor, self).__init__(
-            image_processor=image_processor,
-            tokenizer=tokenizer,
-            max_length=max_length,
-            padding=padding,
-            return_tensors=return_tensors)
-
-    def __call__(self, text_input=None, text_pair=None):
-        """call function"""
-        output = {}
-        if not self.tokenizer:
-            raise ValueError(f"For {self.__name__}, the `tokenizer` should not be None.")
-        if not isinstance(self.tokenizer, PreTrainedTokenizerBase):
-            raise TypeError(f"tokenizer should inherited from the PreTrainedTokenizerBase,"
-                            f" but got {type(self.tokenizer)}.")
-        if text_input:
-            # Format the input into a batch
-            if isinstance(text_input, str):
-                text_input = [text_input]
-            text_output = self.tokenizer(text_input, return_tensors=self.return_tensors,
-                                         max_length=self.max_length,
-                                         padding=self.padding)["input_ids"]
-            output['text'] = text_output
-
-        if text_pair:
-            # Format the input into a batch
-            if isinstance(text_pair, str):
-                text_input = [text_pair]
-            text_output = self.tokenizer(text_pair, return_tensors=self.return_tensors,
-                                         max_length=self.tgt_max_length,
-                                         padding=self.padding)["input_ids"]
-            output['tgt_output'] = text_output
-
-        return output
diff --git a/research/visualglm/visualglm_qformer.py b/research/visualglm/visualglm_qformer.py
deleted file mode 100644
index ffa6b8d6..00000000
--- a/research/visualglm/visualglm_qformer.py
+++ /dev/null
@@ -1,572 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file was refer to project:
-# https://github.com/salesforce/LAVIS/tree/main/lavis/models/blip2_models
-# ============================================================================
-"""
-Visualglm Qformer, link to ViT.
-the main model for image-text pretraining.
-"""
-
-import os
-
-import mindspore as ms
-import mindspore.common.dtype as mstype
-import mindspore.nn as nn
-import mindspore.numpy as np
-import mindspore.ops as ops
-from mindspore import Tensor
-from mindspore.ops import operations as P
-
-from mindformers import CrossEntropyLoss
-from mindformers.modules.layers import Linear
-from mindformers.tools.logger import logger
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from visualglm_base import VisualGLMBase
-from visualglm_config import VisualGLMConfig
-
-
-def choose_idx_with_prob(weight: Tensor):
-    """
-    choose idx depend on probability, replace torch.multinomial
-    """
-    weight_acc = ops.cumsum(weight, -1)
-    rand_x = np.rand([1], dtype=weight_acc.dtype) * weight_acc[-1]
-    idx = np.argmax(weight_acc > rand_x)
-    return idx
-
-class AllGatherWithGrad(nn.Cell):
-    """
-    AllGather Layer which does not cut gradients.
-    """
-    def __init__(self):
-        super(AllGatherWithGrad, self).__init__()
-        self.all_gather = ops.AllGather()
-        self.reduce_scatter = ops.ReduceScatter(ops.ReduceOp.SUM)
-
-    def construct(self, x):
-        return self.all_gather(x)
-
-    def bprop(self, x, out, dout):
-        x = x
-        out = out
-        return (self.reduce_scatter(dout),)
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class VisualGLMQformer(VisualGLMBase):
-    """
-    VisualGLM first-stage model with Q-former and ViT.
-    Args:
-        config (VisualGLMConfig): The config of VisualGLMQformer.
-
-    Returns:
-        Tensor, loss, logits.
-
-    Examples:
-        >>> from mindformers.models.blip2 import Blip2Qformer
-        >>> model = VisualGLMQformer.from_pretrained("blip2_stage1_vit_g")
-
-    """
-
-    def __init__(self, config: VisualGLMConfig, **kwargs):
-        super(VisualGLMQformer, self).__init__(config, **kwargs)
-        self.config = config if config is not None else VisualGLMConfig()
-        self.group_size = int(os.getenv('RANK_SIZE', '1'))
-        self.rank = int(os.getenv('RANK_ID', '0'))
-        self.visual_encoder, self.ln_vision = self.init_vision_encoder()
-        if config.freeze_vision:
-            for _, cell in self.visual_encoder.cells_and_names():
-                params = cell.get_parameters()
-                for param in params:
-                    param.requires_grad = False
-            self.visual_encoder.set_train(False)
-            logger.info("freeze vision encoder")
-
-        qformer_config = self.config.qformer_config
-
-        # note on Atlas 800T A2, function resize_token_embeddings() is not supported,
-        # thus in this case, a resized weight will be loaded, i.e:
-        # 1) vocab_size = vocab_size + special_token_nums,
-        # 2) special_token_nums = 0
-        if not qformer_config.resize_token_embeddings:
-            qformer_config.vocab_size = qformer_config.vocab_size + qformer_config.special_token_nums
-            qformer_config.special_token_nums = 0
-
-        # init qformer
-        self.qformer, self.query_tokens = self.init_qformer()
-
-        if qformer_config.resize_token_embeddings:
-            # note special token added: bos_token -> [DEC]
-            self.qformer.resize_token_embeddings(qformer_config.vocab_size + qformer_config.special_token_nums)
-
-        params = self.qformer.get_parameters()
-        # modify layer names
-        for param in params:
-            if "_query" in param.name:
-                key_orig = param.name.replace("_query", "")
-                param.set_data(self.qformer.parameters_dict().get(key_orig))
-
-        # parallel settings
-        if config.parallel_config:
-            dp = config.parallel_config.data_parallel
-            mp = config.parallel_config.model_parallel
-        else:
-            dp = mp = 1
-
-        self.vision_proj = Linear(in_channels=qformer_config.hidden_size,
-                                  out_channels=qformer_config.head_embed_dim,
-                                  param_init_type=config.dtype,
-                                  compute_dtype=config.compute_dtype)
-        self.vision_proj.shard(strategy_matmul=((dp, mp), (1, mp)))
-
-        self.text_proj = Linear(in_channels=qformer_config.hidden_size,
-                                out_channels=qformer_config.head_embed_dim,
-                                param_init_type=config.dtype,
-                                compute_dtype=config.compute_dtype)
-        self.text_proj.shard(strategy_matmul=((dp, mp), (1, mp)))
-
-        self.itm_head = Linear(in_channels=qformer_config.hidden_size,
-                               out_channels=2,
-                               param_init_type=config.dtype,
-                               compute_dtype=config.compute_dtype)
-        self.itm_head.shard(strategy_matmul=((dp, mp), (1, mp)))
-
-        self.gather = P.Gather()
-        self.matmul = P.BatchMatMul()
-        self.matmul.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
-        self.concat = ops.concat
-        self.expand_dims = ops.expand_dims
-        self.transpose = ops.transpose
-        self.zeros = ops.zeros
-        self.ones = ops.ones
-        self.linspace = ops.linspace
-        self.floor = ops.floor
-        self.softmax = nn.Softmax(axis=1)
-        self.softmax.softmax.shard(((dp, mp, 1),))
-        self.eye = ops.eye
-        self.masked_fill = ops.masked_fill
-        self.stack = ops.stack
-        self.broadcast_to = ops.broadcast_to
-        self.tile = P.Tile()
-        self.tile.shard(((dp, mp, 1, 1),))
-        self.normalize = ops.L2Normalize(axis=-1, epsilon=1e-12)
-
-        self.temp = ms.Parameter(
-            Tensor(0.07, dtype=config.compute_dtype), requires_grad=True)
-
-        self.max_txt_len = config.max_txt_len
-        self.bos_token_id = qformer_config.bos_token_id
-        self.pad_token_id = qformer_config.pad_token_id
-
-        if self.group_size > 1:
-            self.all_gather = ops.AllGather()
-            self.all_gather_with_grad = AllGatherWithGrad()
-        self.not_equal = P.NotEqual()
-        self.cast = P.Cast()
-
-        self.itc_loss = CrossEntropyLoss(label_smoothing=0.1)
-        self.itm_loss = CrossEntropyLoss()
-
-    def construct(self, image: ms.Tensor, text_input_ids: ms.Tensor, return_tuple: bool = False):
-        """
-        forwarding image and text, compute itc, itm and lm losses.
-
-        Args:
-            image (Tensor):
-                The indices of images.
-            text_input_ids (Tensor):
-                The indices of input sequence tokens in the vocabulary.
-            return_tuple (bool, defaults to False):
-                Whether to return the output separately. If set to True,
-                the loss, loss_itc, loss_itm and loss_lm will be returned as a tuple,
-                otherwise only the loss will be returned.
-
-        Returns:
-            loss (Tensor) or loss_tuple (tuple):
-                if return_tuple is False, directly return the loss.
-                otherwise, loss, loss_itc, loss_itm and loss_lm will be
-                returned as a tuple.
-        """
-        image_feats, image_embeds, past_key_values = self.forward_image(
-            image, use_cache=True)
-        image_feats = self.normalize(self.vision_proj(image_feats))
-
-        text_embeds, text_attention_mask = self.forward_text(text_input_ids)
-        text_feat = self.normalize(self.text_proj(text_embeds[:, 0, :]))
-
-        image_feats = self.cast(image_feats, mstype.float16)
-        text_feat = self.cast(text_feat, mstype.float16)
-
-        ### ============== Image-text Contrastive ===================###
-        # if/else branch: distribute setting
-        if self.group_size > 1:
-            # [batch_size*num_gpu, num_query_tokens, embed_dim]
-            image_feats_all = self.all_gather(image_feats)
-            # [batch_size*num_gpu, embed_dim]
-            text_feat_all = self.all_gather(text_feat)
-        else:
-            image_feats_all = image_feats
-            text_feat_all = text_feat
-
-        batch_size = image.shape[0]
-        sim_q2t = []
-        for i in range(self.group_size):
-            text_feat_part = text_feat_all[i * batch_size: (i + 1) * batch_size]
-            sim_temp = self.matmul(self.expand_dims(image_feats, 1), self.expand_dims(
-                self.expand_dims(text_feat_part, -1), 0)).squeeze(-1)
-            sim_q2t.append(sim_temp.max(-1))
-        # query-text similarity: [batch_size, batch_size*num_gpu]
-        sim_q2t = self.concat(sim_q2t, axis=1)
-
-        # image-text similarity: aggregate across all query tokens
-        sim_i2t = sim_q2t / self.temp
-
-        sim_t2q = []
-        # align with ops.matmul, x1 -> [batch_size, batch_size, 1, embed_dim]
-        text_feat = self.tile(self.expand_dims(
-            self.expand_dims(text_feat, 1), 1), (1, batch_size, 1, 1))
-        for i in range(self.group_size):
-            image_feats_part = image_feats_all[i * batch_size: (i + 1) * batch_size]
-            # align with ops.matmul, x2 -> [batch_size, batch_size, embed_dim, num_query_tokens]
-            image_feats_part = self.tile(self.expand_dims(
-                self.transpose(image_feats_part, (0, 2, 1)), 0), (batch_size, 1, 1, 1))
-            # compute similarity same as ops.matmul
-            sim_temp = self.matmul(text_feat, image_feats_part).squeeze(2)
-            sim_t2q.append(sim_temp.max(-1))
-        # text-query similarity: [batch_size, batch_size*num_gpu]
-        sim_t2q = self.concat(sim_t2q, axis=1)
-
-        # text-image similarity: aggregate across all query tokens
-        sim_t2i = sim_t2q / self.temp
-
-        targets = self.floor(self.linspace(ms.Tensor(self.rank * batch_size, mstype.float32),
-                                           ms.Tensor(self.rank * batch_size + batch_size - 1, mstype.float32),
-                                           batch_size)).astype(mstype.int32)
-
-        sim_i2t = self.cast(sim_i2t, mstype.float32)
-        sim_t2i = self.cast(sim_t2i, mstype.float32)
-        loss_itc = (self.itc_loss(sim_i2t, targets) +
-                    self.itc_loss(sim_t2i, targets)) / 2
-
-        # ============== Image-text Matching ===================
-        # mask text-image similarity as weights
-        weights_t2i, weights_i2t = self.fill_masked_weight(sim_t2i, sim_i2t, batch_size)
-
-        # choose negative image/text for each text/image
-        image_embeds_neg, text_ids_neg = self.choose_negative_targets(weights_t2i,
-                                                                      weights_i2t,
-                                                                      batch_size,
-                                                                      image_embeds,
-                                                                      text_input_ids)
-
-        text_ids_all = self.concat(
-            [text_input_ids, text_input_ids, text_ids_neg], axis=0)  # pos, pos, neg
-
-        image_embeds_all = self.concat(
-            [image_embeds, image_embeds_neg, image_embeds], axis=0)  # pos, neg, pos
-
-        vl_embeddings = self.forward_text_and_image(
-            image_embeds_all, text_ids_all, vit_computed=True)
-        vl_output = self.itm_head(vl_embeddings)
-        logits = vl_output.mean(axis=1)
-
-        itm_labels = self.concat(
-            [self.ones(batch_size, mstype.int32), self.zeros(2 * batch_size, mstype.int32)],
-            axis=0
-        )
-        loss_itm = self.itm_loss(logits, itm_labels)
-
-        # ================= Image Captioning ========================
-        decoder_input_ids = text_input_ids.copy().astype(mstype.float32)
-        decoder_input_ids[:, 0] = self.bos_token_id
-        decoder_input_ids = decoder_input_ids.astype(mstype.int32)
-        labels = decoder_input_ids.masked_fill(
-            decoder_input_ids == self.pad_token_id, -100
-        )
-
-        query_tokens = self.broadcast_to(self.query_tokens, (image_embeds.shape[0], -1, -1))
-        query_atts = self.ones(query_tokens.shape[:-1], mstype.float32)
-
-        attention_mask = self.concat(
-            [query_atts, text_attention_mask.astype(mstype.float32)], axis=1)
-        lm_output = self.qformer(
-            decoder_input_ids,
-            attention_mask=attention_mask,
-            past_key_values=past_key_values,
-            labels=labels,
-        )
-
-        loss_lm = lm_output[0]
-        loss = loss_itc + loss_itm + loss_lm
-
-        if return_tuple:
-            return (
-                loss,
-                loss_itc,
-                loss_itm,
-                loss_lm
-            )
-        return loss
-
-    def forward_image(self, image, use_cache=False):
-        """ forawrd image through vit and the bert model.
-
-        Args:
-            image (Tensor): input image
-            use_cache (bool, optional): whether to return past_key_values.
-
-        Returns:
-            hidden_states, image_embeds_frozen, past_key_values (optional)
-        """
-        image_embeds_frozen = self.ln_vision(self.visual_encoder(image))
-        image_atts = self.ones(
-            image_embeds_frozen.shape[:-1], mstype.float32)
-        query_tokens = self.broadcast_to(
-            self.query_tokens, (image_embeds_frozen.shape[0], -1, -1))
-
-        query_output = self.qformer.bert(
-            query_embeds=query_tokens,
-            encoder_hidden_states=image_embeds_frozen,
-            encoder_attention_mask=image_atts,
-            use_cache=use_cache,
-        )
-        if use_cache:
-            return query_output[0], image_embeds_frozen, query_output[1]
-        return query_output[0], image_embeds_frozen
-
-    def forward_text(self, text_input_ids):
-        """ forawrd text_ids through the bert model.
-
-        Args:
-            text_input_ids (Tensor): input text_ids
-
-        Returns:
-            text embeddings and mask
-        """
-        attention_mask = self.cast(self.not_equal(
-            text_input_ids, self.pad_token_id), mstype.float32)
-        text_output = self.qformer.bert(
-            text_input_ids,
-            attention_mask=attention_mask
-        )
-        # text embeddings and mask
-        return text_output[0], attention_mask
-
-    def forward_text_and_image(self, image_inputs, text_ids, vit_computed=False):
-        """ forward text and image at the same time to the bert model.
-
-        Args:
-            image_inputs(Tensor): input image or image embeds (computed)
-            text_input_ids (Tensor): input text_ids
-            vit_computed (bool, optional): whether image embeds is computed
-
-        Returns:
-            multimodal embeddings
-        """
-        if not vit_computed:
-            image_embeds_frozen = self.ln_vision(
-                self.visual_encoder(image_inputs))
-        else:
-            image_embeds_frozen = image_inputs
-        text_atts = self.cast(self.not_equal(
-            text_ids, self.pad_token_id), mstype.float32)
-        image_atts = self.ones(
-            image_embeds_frozen.shape[:-1], mstype.float32)
-        query_tokens = self.broadcast_to(
-            self.query_tokens, (image_embeds_frozen.shape[0], -1, -1))
-        query_atts = self.ones(query_tokens.shape[:-1], mstype.float32)
-        attention_mask = self.concat([query_atts, text_atts], axis=1)
-
-        output_itm = self.qformer.bert(
-            text_ids,
-            query_embeds=query_tokens,
-            attention_mask=attention_mask,
-            encoder_hidden_states=image_embeds_frozen,
-            encoder_attention_mask=image_atts
-        )
-        # multimodal embeddings
-        multimodal_embeds = output_itm[0][:, : query_tokens.shape[1], :]
-        return multimodal_embeds
-
-    def fill_masked_weight(self, sim_t2i, sim_i2t, batch_size):
-        """return masked weights based on similarity
-
-        Args:
-            sim_t2i (Tensor): text-to-image similarity
-            sim_i2t (Tensor): image-to-text similarity
-            batch_size (int): current batch size
-        """
-        weights_t2i = self.softmax(sim_t2i) + 1e-4
-        diag_fill_mask_t2i = self.eye(weights_t2i.shape[0], batch_size, mstype.bool_)
-        filled_weights_t2i = self.masked_fill(
-            weights_t2i[:, self.rank * batch_size: self.rank * batch_size + batch_size],
-            diag_fill_mask_t2i, 0)
-        weights_t2i[:, self.rank * batch_size: self.rank *
-                    batch_size + batch_size] = filled_weights_t2i
-
-        weights_i2t = self.softmax(sim_i2t) + 1e-4
-        diag_fill_mask_i2t = self.eye(weights_i2t.shape[0], batch_size, mstype.bool_)
-        filled_weights_i2t = self.masked_fill(
-            weights_i2t[:, self.rank * batch_size: self.rank * batch_size + batch_size],
-            diag_fill_mask_i2t, 0)
-        weights_i2t[:, self.rank * batch_size: self.rank *
-                    batch_size + batch_size] = filled_weights_i2t
-
-        return weights_t2i, weights_i2t
-
-    def choose_negative_targets(self,
-                                weights_t2i,
-                                weights_i2t,
-                                batch_size,
-                                image_embeds,
-                                text_input_ids):
-        """choose negative targets for each image/text.
-
-        Args:
-            weights_t2i (Tensor): masked text-to-image weights
-            weights_i2t (Tensor): masked image-to-text weights
-            batch_size (int): current batch size
-            image_embeds (Tensor): image embeddings
-            text_input_ids (Tensor): text ids
-
-        Returns:
-            image_embeds_neg (Tensor): negative image_embeds
-            text_ids_neg (Tensor): negative text ids
-        """
-        if self.group_size > 1:
-            # do all_gather with grads, align with torch impl.
-            image_embeds_gathered = self.all_gather_with_grad(image_embeds)
-            text_ids_gathered = self.all_gather(text_input_ids)
-        else:
-            image_embeds_gathered = image_embeds
-            text_ids_gathered = text_input_ids
-
-        # select a negative image for each text
-        image_embeds_neg_idx = self.zeros(batch_size, mstype.int32)
-        for i in range(batch_size):
-            image_embeds_neg_idx[i] = choose_idx_with_prob(weights_t2i[i])
-        image_embeds_neg = self.gather(image_embeds_gathered, image_embeds_neg_idx, 0)
-
-        # select a negative text for each image
-        text_ids_neg_idx = self.zeros(batch_size, mstype.int32)
-        for i in range(batch_size):
-            text_ids_neg_idx[i] = choose_idx_with_prob(weights_i2t[i])
-        text_ids_neg = self.gather(text_ids_gathered, text_ids_neg_idx, 0)
-
-        return image_embeds_neg, text_ids_neg
-
-    def compute_itm(self, image_inputs, text_ids, vit_computed=False):
-        """ compute image-text matching scores for the model.
-        Args:
-            image_inputs (Tensor): input image or image embeds (computed)
-            text_ids (Tensor): input text_ids
-            vit_computed (bool, optional): whether image embeds is computed
-
-        Returns:
-            itm_logit
-        """
-        vl_embeddings = self.forward_text_and_image(
-            image_inputs, text_ids, vit_computed)
-        itm_logit = self.itm_head(vl_embeddings)
-        itm_logit = itm_logit[:, :, 1].mean(axis=1)
-        return itm_logit
-
-    def get_image_feature(self, image, output_past_keys=False):
-        """extract image feature"""
-        forward_image_outputs = self.forward_image(image, output_past_keys)
-        image_features = ops.L2Normalize(
-            axis=-1, epsilon=1e-12)(self.vision_proj(forward_image_outputs[0]))
-        return image_features
-
-    def get_text_feature(self, input_ids):
-        """extract text feature"""
-        forward_text_outputs = self.forward_text(input_ids)
-        text_features = ops.L2Normalize(
-            axis=-1, epsilon=1e-12)(self.text_proj(forward_text_outputs[0]))
-        return text_features
-
-    def extract_features(self, samples, mode="multimodal"):
-        """ extract feature as well as embeds by given mode,
-
-        Args:
-            samples (tuple of Tensors): image/text input
-            mode (str): [image, text, multimodal]
-        """
-        image = samples.get("image")
-        text_ids = samples.get("text_input")
-
-        # assert mode is one of "image", "text", "multimodal"
-        assert mode in [
-            "image",
-            "text",
-            "multimodal",
-        ], "mode must be one of 'image', 'text', 'multimodal'"
-
-        # initialize output
-        image_embeds, text_embeds, multimodal_embeds = None, None, None
-        image_features, text_features = None, None
-
-        if mode == "image":
-            assert (
-                image is not None
-            ), "Image is not provided for mode 'image' or 'multimodal'"
-            # return query features
-            forward_image_outputs = self.forward_image(image, use_cache=False)
-            image_embeds = forward_image_outputs[0]
-            image_features = ops.L2Normalize(
-                axis=-1, epsilon=1e-12)(self.vision_proj(image_embeds))
-
-        elif mode == "text":
-            assert text_ids is not None, "text input is None for mode 'text' or 'multimodal'"
-            forward_text_outputs = self.forward_text(text_ids)
-            text_embeds = forward_text_outputs[0]
-            text_features = ops.L2Normalize(
-                axis=-1, epsilon=1e-12)(self.text_proj(text_embeds))
-
-        elif mode == "multimodal":
-            # return multimodal query features
-            multimodal_embeds = self.forward_text_and_image(
-                image, text_ids, vit_computed=False)
-        return (image_embeds,
-                image_features,
-                text_embeds,
-                text_features,
-                multimodal_embeds)
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class Blip2Classifier(VisualGLMQformer):
-    """
-    Blip2Classifier rely on Blip2Qformer, used for zero-shot classification.
-
-    Args:
-        config (VisualGLMConfig): The config of Blip2Qformer.
-
-    Examples:
-        >>> from mindformers import Blip2Classifier
-        >>> model_type = 'blip2_stage1_classification'
-        >>> model = Blip2Classifier.from_pretrained(model_type)
-        >>> type(model)
-        <class 'mindformers.models.blip2.blip2_qformer.Blip2Classifier'>
-    """
-
-    def __init__(self, config: VisualGLMConfig, **kwargs):
-        super(Blip2Classifier, self).__init__(config, **kwargs)
-        self.load_checkpoint(config)
-
-    def construct(self, image: ms.Tensor, text_input_ids: ms.Tensor, return_tuple: bool = False):
-        image_features = self.get_image_feature(image)[:, 0]
-        text_features = self.get_text_feature(text_input_ids)[:, 0]
-        sims = ops.matmul(image_features, text_features.T) / self.temp
-        return sims, sims.T  # no label as input (compare to CLIP)
diff --git a/research/visualglm/visualglm_text_generation_pipeline.py b/research/visualglm/visualglm_text_generation_pipeline.py
deleted file mode 100644
index 1afc17b0..00000000
--- a/research/visualglm/visualglm_text_generation_pipeline.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""visualglm Image to text generation Pipeline adaptor."""
-import os
-import re
-from collections import OrderedDict
-from typing import Optional, Union
-
-import numpy as np
-from PIL import Image
-from mindspore import Tensor, Model
-
-from mindformers import AutoProcessor, AutoModel
-from mindformers.mindformer_book import MindFormerBook
-from mindformers.models import PreTrainedModel, BaseImageProcessor
-from mindformers.pipeline.base_pipeline import Pipeline
-from mindformers.tools.image_tools import load_image
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-
-__all__ = ['VisualGLMImageToTextGenerationPipeline', 'register_pipeline_task']
-
-
-def register_pipeline_task():
-    """ register pipeline task for visualglm """
-    MindFormerBook.get_pipeline_support_task_list()['visualglm_image_to_text_generation'] = OrderedDict([
-        ("visualglm_6b", os.path.join(
-            MindFormerBook.get_project_path(), "research/visualglm/run_visualglm_6b_image_to_text_generation.yaml"))])
-    MindFormerBook.get_trainer_support_task_list()['visualglm_image_to_text_generation'] = OrderedDict([
-        ("visualglm_6b", os.path.join(
-            MindFormerBook.get_project_path(), "research/visualglm/run_visualglm_6b_image_to_text_generation.yaml"))])
-
-
-@MindFormerRegister.register(MindFormerModuleType.PIPELINE, alias="visualglm_image_to_text_generation")
-class VisualGLMImageToTextGenerationPipeline(Pipeline):
-    r"""Visualglm pipeline for image to text generation
-
-    Args:
-        model (Union[str, PreTrainedModel]): The model used to perform task,
-            the input could be a supported model name, or a model instance
-            inherited from PreTrainedModel.
-        image_processor (Optional[BaseImageProcessor]): The image_processor of model,
-            it could be None if the model do not need image_processor.
-
-    Raises:
-        TypeError: If input model and image_processor's types are not corrected.
-        ValueError: If the input model is not in support list.
-    """
-    _support_list = MindFormerBook.get_pipeline_support_task_list()['image_to_text_generation'].keys()
-
-    def __init__(self, model: Union[str, PreTrainedModel, Model],
-                 image_processor: Optional[BaseImageProcessor] = None,
-                 tokenizer=None,
-                 **kwargs):
-
-        if isinstance(model, str):
-            if model in self._support_list:
-                if image_processor is None:
-                    image_processor = AutoProcessor.from_pretrained(model).image_processor
-                if not isinstance(image_processor, BaseImageProcessor):
-                    raise TypeError(f"image_processor should be inherited from"
-                                    f" BaseImageProcessor, but got {type(image_processor)}.")
-                model = AutoModel.from_pretrained(model)
-            else:
-                raise ValueError(f"{model} is not supported by ImageToTextGenerationPipeline,"
-                                 f"please selected from {self._support_list}.")
-
-        if not isinstance(model, (PreTrainedModel, Model)):
-            raise TypeError(f"model should be inherited from PreTrainedModel or Model, but got type {type(model)}.")
-
-        if image_processor is None:
-            raise ValueError("ImageToTextGenerationPipeline"
-                             " requires for a image_processor.")
-        self.hypothesis_template = kwargs.pop("hypothesis_template", "{}")
-        super().__init__(model.set_train(mode=False), image_processor=image_processor, tokenizer=tokenizer, **kwargs)
-
-    def _sanitize_parameters(self, **pipeline_parameters):
-        r"""Sanitize Parameters
-
-        Args:
-            pipeline_parameters (Optional[dict]): The parameter dict to be parsed.
-        """
-        preprocess_params = {}
-        postprocess_params = {}
-        forward_params = {}
-
-        post_list = ["top_k"]
-        pre_list = ["hypothesis_template", "max_length", "padding"]
-        forward_list = ['top_k', 'top_p', 'do_sample', 'eos_token_id', 'repetition_penalty', 'max_length', 'seed']
-        for item in post_list:
-            if item in pipeline_parameters:
-                postprocess_params[item] = pipeline_parameters.get(item)
-
-        for item in pre_list:
-            if item in pipeline_parameters:
-                preprocess_params[item] = pipeline_parameters.get(item)
-
-        for item in forward_list:
-            if item in pipeline_parameters:
-                forward_params[item] = pipeline_parameters.get(item)
-
-        return preprocess_params, forward_params, postprocess_params
-
-    @staticmethod
-    def generate_glm_prompt(unhandled_prompts, history=None, english=False):
-        """ generate glm prompt from raw prompt """
-        if history is None:
-            history = []
-        post_prompts, image_positions = [], []
-        for query in unhandled_prompts:
-            prompt = "</img>"
-            if english:
-                for _, (old_query, response) in enumerate(history):
-                    prompt += "Q:{}\nA:{}\n".format(old_query, response)
-                prompt += "Q:{}\nA:".format(query)
-            else:
-                for _, (old_query, response) in enumerate(history):
-                    prompt += "问：{}\n答：{}\n".format(old_query, response)
-                prompt += "问：{}\n答：".format(query)
-            post_prompts.append(prompt)
-        pre_prompts = ["<img>"] * len(post_prompts)
-        image_positions = [len("<img>")] * len(post_prompts)
-        return pre_prompts, post_prompts, image_positions
-
-    def handle_prompt(self, prompt, image_size):
-        if not prompt:
-            raw_prompts = [""] * image_size
-        else:
-            raw_prompts = prompt.split(',')
-
-        # handle prompt using chatglm type
-        pre_prompts, post_prompts, image_positions = self.generate_glm_prompt(raw_prompts)
-
-        return pre_prompts, post_prompts, image_positions
-
-    def preprocess(self, inputs: (Union[str, dict, Image.Image, Tensor, np.ndarray]),
-                   **preprocess_params):
-        r"""The Preprocess For Task
-
-        Args:
-            inputs (Union[url, dict, PIL.Image, tensor, numpy]): Inputs used to generate text, including image,
-                and prompt (if provided).
-            preprocess_params (dict): The parameter dict for preprocess.
-
-        Return:
-            Processed image and prompt.
-        """
-        if isinstance(inputs, dict):
-            image = inputs['image']
-            prompt = inputs.get('prompt', None)
-        else:
-            image = inputs
-            prompt = ""
-
-        if self._batch_size is None:
-            batch_size = 1
-        else:
-            batch_size = self._batch_size
-
-        image_size = 1
-        print(f"batch_size: {self._batch_size}")
-        if isinstance(image, str):
-            image = image.split(',')
-            image_size = len(image)
-            if batch_size > 1:
-                diff = batch_size - image_size
-                if diff > 0:
-                    extend_filepath = [image[-1]] * diff
-                    image.extend(extend_filepath)
-                else:
-                    image = image[:batch_size]
-            image_list = [load_image(filepath) for filepath in image]
-        else:
-            image_list = [image]
-
-        pre_prompts, post_prompts, image_positions = self.handle_prompt(prompt, image_size)
-        if batch_size > 1:
-            diff = batch_size - image_size
-            if diff > 0:
-                extend_pre_prompt = [pre_prompts[-1]] * diff
-                extend_post_prompt = [post_prompts[-1]] * diff
-                extend_positions = [image_positions[-1]] * diff
-                pre_prompts.extend(extend_pre_prompt)
-                post_prompts.extend(extend_post_prompt)
-                image_positions.extend(extend_positions)
-            else:
-                pre_prompts = pre_prompts[:batch_size]
-                post_prompts = post_prompts[:batch_size]
-
-        image_processed = self.image_processor(image_list)
-
-        max_length = preprocess_params.pop("max_length", 32)
-        padding = preprocess_params.pop("padding", "max_length")
-
-        pre_input_ids = self.tokenizer(pre_prompts, add_special_tokens=False, return_tensors="ms")["input_ids"]
-        post_input_ids = self.tokenizer(post_prompts,
-                                        max_length=max_length - len(pre_input_ids[0]),
-                                        padding=padding,
-                                        return_tensors="ms")["input_ids"]
-
-        return {"image_processed": image_processed, "pre_input_ids": pre_input_ids, "post_input_ids": post_input_ids}
-
-    def forward(self, model_inputs: dict,
-                **forward_params):
-        r"""The Forward Process of Model
-
-        Args:
-            model_inputs (dict): The output of preprocess.
-            forward_params (dict): The parameter dict for model forward.
-        """
-        del forward_params
-        image_processed = model_inputs["image_processed"]
-        pre_input_ids = model_inputs["pre_input_ids"]
-        post_input_ids = model_inputs["post_input_ids"]
-
-        output_ids_per_image = self.network.generate_text_for_image(image_processed, pre_input_ids, post_input_ids)
-        return {"output_ids": output_ids_per_image}
-
-    @staticmethod
-    def process_response(response_list):
-        """ get standard response """
-        handled_response = []
-        for response in response_list:
-            response = response.strip()
-            response = response.replace("[[训练时间]]", "2023年")
-            punkts = [
-                [",", "，"],
-                ["!", "！"],
-                [":", "："],
-                [";", "；"],
-                [r"\?", "？"],
-            ]
-            for item in punkts:
-                response = re.sub(r"([\u4e00-\u9fff])%s" % item[0], r"\1%s" % item[1], response)
-                response = re.sub(r"%s([\u4e00-\u9fff])" % item[0], r"%s\1" % item[1], response)
-            response = response.split('答：')[-1].strip()
-            handled_response.append(response)
-        return handled_response
-
-    def postprocess(self, model_outputs, **postprocess_params):
-        """ post process """
-        del postprocess_params
-        output_ids = model_outputs["output_ids"]
-        outputs = self.tokenizer.decode(output_ids, skip_special_tokens=True)
-        outputs = self.process_response(outputs)
-        return outputs
diff --git a/research/visualglm/visualglm_vit.py b/research/visualglm/visualglm_vit.py
deleted file mode 100644
index e80bdefa..00000000
--- a/research/visualglm/visualglm_vit.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file was refer to project:
-# https://github.com/salesforce/LAVIS/tree/main/lavis/models/blip2_models
-# ============================================================================
-"""vit models for visualglm"""
-import os
-from collections import OrderedDict
-
-from mindspore import load_checkpoint
-import mindspore.common.dtype as mstype
-
-from mindformers.mindformer_book import MindFormerBook
-from mindformers.models.vit.vit import ViTModel, ViTConfig
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.tools.logger import logger
-from mindformers.tools.utils import try_sync_file
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class ViTModelForBlip2(ViTModel):
-    """
-    ViTModel For visualglm Models, loading a pretrained weight.
-    forward will return the penultimate output.
-    """
-    _support_list = MindFormerBook.get_config_support_list()['vit']
-
-    def __init__(self, config: ViTConfig):
-        super(ViTModelForBlip2, self).__init__(config)
-        print(f"------------------vit checkpoint path: {config.checkpoint_name_or_path}")
-        self.load_checkpoint(config)
-
-    def construct(self, image):
-        return self.construct_without_pool(image)
-
-    def load_checkpoint(self, config: ViTConfig):
-        """
-        load checkpoint for BertLMHeadModel. (we can use the param for BertModel on obs,
-        but we need to alter the names of some param)
-
-        Args:
-            config (ModelConfig): QFormerConfig instance, which could have attribute
-            "checkpoint_name_or_path (str)". set checkpoint_name_or_path to a supported
-            model name or a path to checkpoint, to load model weights.
-        """
-        checkpoint_name_or_path = config.checkpoint_name_or_path
-        # the relevant file will be downloaded from the Obs platform.
-        if not os.path.exists(checkpoint_name_or_path):
-            if checkpoint_name_or_path not in self._support_list:
-                raise ValueError(f"{checkpoint_name_or_path} is not a supported default model"
-                                 f" or a valid path to checkpoint,"
-                                 f" please select from {self._support_list}.")
-            # on Atlas 800T A2, load the 'resized' checkpoint.
-            if not config.resize_token_embeddings and not checkpoint_name_or_path.endswith("_resized"):
-                checkpoint_name_or_path = checkpoint_name_or_path + "_resized"
-            checkpoint_name = checkpoint_name_or_path
-            default_checkpoint_download_folder = os.path.join(
-                MindFormerBook.get_default_checkpoint_download_folder(),
-                checkpoint_name_or_path.split("_")[0])
-            if not os.path.exists(default_checkpoint_download_folder):
-                os.makedirs(default_checkpoint_download_folder, exist_ok=True)
-
-            ckpt_file = os.path.join(default_checkpoint_download_folder, checkpoint_name + ".ckpt")
-            if not os.path.exists(ckpt_file):
-                url = MindFormerBook.get_moddownload_with_progress_barel_ckpt_url_list()[checkpoint_name_or_path][0]
-                succeed = (url, ckpt_file)
-                if not succeed:
-                    logger.info("checkpoint download failed, and pretrained weights are unloaded.")
-                    return
-            try_sync_file(ckpt_file)
-            self.default_checkpoint_download_path = ckpt_file
-            logger.info("start to read the ckpt file: %s", os.path.getsize(ckpt_file))
-        else:
-            ckpt_file = checkpoint_name_or_path
-        param = load_checkpoint(ckpt_file)
-        try:
-            self.convert_vit_model_params(param)
-            logger.info("weights in %s are loaded", ckpt_file)
-        except RuntimeError:
-            logger.error("the given config and weights in %s are"
-                         " mismatched, and weights load failed", ckpt_file)
-
-    def convert_vit_model_params(self, vit_model_params: OrderedDict):
-        """
-        convert params from BertModel in MindFormers, some param names are altered.
-        """
-        param_dict = self.parameters_dict()
-        for name, data in param_dict.items():
-            if name.startswith('ln_vision'):
-                new_name = name
-            else:
-                new_name = 'visual_encoder.' + name
-            if new_name not in vit_model_params:
-                logger.warning("%s does not exist", new_name)
-                continue
-            new_data = vit_model_params[new_name]
-            new_data = new_data.astype(mstype.float32)
-            data.assign_value(new_data)
diff --git a/research/wizardcoder/convert_reversed.py b/research/wizardcoder/convert_reversed.py
deleted file mode 100644
index 22f92d9b..00000000
--- a/research/wizardcoder/convert_reversed.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from mindspore"""
-import argparse
-import collections
-
-import torch
-import mindspore as ms
-
-from mindformers.utils.convert_utils import ms2pt
-
-ms_name = [
-    "backbone.blocks.{}.layernorm1.gamma",
-    "backbone.blocks.{}.layernorm1.beta",
-    "backbone.blocks.{}.layernorm2.gamma",
-    "backbone.blocks.{}.layernorm2.beta",
-    "backbone.blocks.{}.attention.projection.weight",  #
-    "backbone.blocks.{}.attention.projection.bias",
-    "backbone.blocks.{}.attention.dense1.weight",
-    "backbone.blocks.{}.attention.dense1.bias",
-    "backbone.blocks.{}.attention.dense2.weight",
-    "backbone.blocks.{}.attention.dense2.bias",
-    "backbone.blocks.{}.attention.dense3.weight",
-    "backbone.blocks.{}.attention.dense3.bias",
-    "backbone.blocks.{}.output.mapping.weight",  #
-    "backbone.blocks.{}.output.mapping.bias",
-    "backbone.blocks.{}.output.projection.weight",  #
-    "backbone.blocks.{}.output.projection.bias",
-]
-
-torch_name = [
-    "transformer.h.{}.ln_1.weight",
-    "transformer.h.{}.ln_1.bias",
-    "transformer.h.{}.ln_2.weight",
-    "transformer.h.{}.ln_2.bias",
-    "transformer.h.{}.attn.c_proj.weight",  #
-    "transformer.h.{}.attn.c_proj.bias",
-    "transformer.h.{}.attn.c_attn.weight.q",
-    "transformer.h.{}.attn.c_attn.bias.q",
-    "transformer.h.{}.attn.c_attn.weight.k",
-    "transformer.h.{}.attn.c_attn.bias.k",
-    "transformer.h.{}.attn.c_attn.weight.v",
-    "transformer.h.{}.attn.c_attn.bias.v",
-    "transformer.h.{}.mlp.c_fc.weight",  #
-    "transformer.h.{}.mlp.c_fc.bias",
-    "transformer.h.{}.mlp.c_proj.weight",  #
-    "transformer.h.{}.mlp.c_proj.bias"
-]
-
-addition_mindspore = [
-    "backbone.layernorm.gamma",
-    "backbone.layernorm.beta",
-    "backbone.embedding.word_embedding.embedding_table",
-    "backbone.embedding.position_embedding.embedding_table",
-    "head.head_weight",
-]
-
-addition_torch = [
-    "transformer.ln_f.weight",
-    "transformer.ln_f.bias",
-    "transformer.wte.weight",
-    "transformer.wpe.weight",
-    "lm_head.weight",
-]
-
-
-def generate_weight_map(total_layers,
-                        mindspore_params_per_layer,
-                        torch_params_per_layer,
-                        mindspore_additional_params,
-                        torch_additional_params):
-    """
-    generate weight map
-    """
-    map_dict = dict(zip(mindspore_additional_params, torch_additional_params))
-    for i in range(total_layers):
-        for idx, ms_para in enumerate(mindspore_params_per_layer):
-            map_dict[ms_para.format(i)] = torch_params_per_layer[idx].format(i)
-
-    return map_dict
-
-# pylint: disable=W0613
-def convert_ms_to_pt(input_path, output_path, dtype=None, **kwargs):
-    """
-    convert ms to pt
-    """
-    state_dict = {}
-    print(f"Trying to convert mindspore checkpoint in {input_path}.")
-    model_ms = ms.load_checkpoint(input_path)
-
-    assert len(ms_name) == len(torch_name)
-    assert len(addition_mindspore) == len(addition_torch)
-    total_layers, flag = divmod(len(model_ms) - len(addition_mindspore), len(ms_name))
-    if flag:
-        raise Exception("The weight names don't match.")
-    weight_map = generate_weight_map(total_layers, ms_name, torch_name, addition_mindspore, addition_torch)
-
-    attention_dict = collections.defaultdict(lambda: {})
-    for name, value in model_ms.items():
-        value = ms2pt(value, dtype)
-        if name.endswith('weight') and ('mapping' in name or 'projection' in name):
-            value = value.transpose(0, 1)
-        name = weight_map[name]
-
-        if name.endswith('.q'):
-            name = name.rstrip('.q')
-            attention_dict[name]['q'] = value
-            continue
-        if name.endswith('.k'):
-            name = name.rstrip('.k')
-            attention_dict[name]['k'] = value
-            continue
-        if name.endswith('.v'):
-            name = name.rstrip('.v')
-            attention_dict[name]['v'] = value
-            continue
-
-        state_dict[name] = value
-
-    for name, value_dict in attention_dict.items():
-        state_dict[name] = torch.cat((value_dict['q'], value_dict['k'], value_dict['v']), 0)
-
-    torch.save(state_dict, output_path)
-    print(f"Convert finished, the output is saved to {output_path}.")
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="WizardCoder convert script")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="wizardcoder.ckpt",
-                        help="The input mindspore checkpoint path.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default='wizardcoder.bin',
-                        help="The output torch checkpoint path.")
-    opt = parser.parse_args()
-
-    convert_ms_to_pt(opt.mindspore_path, opt.torch_path)
diff --git a/research/wizardcoder/convert_weight.py b/research/wizardcoder/convert_weight.py
deleted file mode 100644
index 1437bef4..00000000
--- a/research/wizardcoder/convert_weight.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Convert checkpoint from torch/huggingface"""
-import argparse
-import os
-
-import numpy as np
-import mindspore as ms
-from transformers import GPTBigCodeForCausalLM
-
-from mindformers.utils.convert_utils import pt2ms
-
-ms.set_context(device_target="CPU")
-
-ms_name = [
-    "backbone.blocks.{}.layernorm1.gamma",
-    "backbone.blocks.{}.layernorm1.beta",
-    "backbone.blocks.{}.layernorm2.gamma",
-    "backbone.blocks.{}.layernorm2.beta",
-    "backbone.blocks.{}.attention.projection.weight",
-    "backbone.blocks.{}.attention.projection.bias",
-    "backbone.blocks.{}.attention.dense1.weight",
-    "backbone.blocks.{}.attention.dense1.bias",
-    "backbone.blocks.{}.attention.dense2.weight",
-    "backbone.blocks.{}.attention.dense2.bias",
-    "backbone.blocks.{}.attention.dense3.weight",
-    "backbone.blocks.{}.attention.dense3.bias",
-    "backbone.blocks.{}.output.mapping.weight",
-    "backbone.blocks.{}.output.mapping.bias",
-    "backbone.blocks.{}.output.projection.weight",
-    "backbone.blocks.{}.output.projection.bias",
-]
-
-torch_name = [
-    "transformer.h.{}.ln_1.weight",
-    "transformer.h.{}.ln_1.bias",
-    "transformer.h.{}.ln_2.weight",
-    "transformer.h.{}.ln_2.bias",
-    "transformer.h.{}.attn.c_proj.weight",
-    "transformer.h.{}.attn.c_proj.bias",
-    "transformer.h.{}.attn.c_attn.weight.q",
-    "transformer.h.{}.attn.c_attn.bias.q",
-    "transformer.h.{}.attn.c_attn.weight.k",
-    "transformer.h.{}.attn.c_attn.bias.k",
-    "transformer.h.{}.attn.c_attn.weight.v",
-    "transformer.h.{}.attn.c_attn.bias.v",
-    "transformer.h.{}.mlp.c_fc.weight",
-    "transformer.h.{}.mlp.c_fc.bias",
-    "transformer.h.{}.mlp.c_proj.weight",
-    "transformer.h.{}.mlp.c_proj.bias"
-]
-
-addition_mindspore = [
-    "backbone.layernorm.gamma",
-    "backbone.layernorm.beta",
-    "backbone.embedding.word_embedding.embedding_table",
-    "backbone.embedding.position_embedding.embedding_table",
-    "head.head_weight",
-]
-
-addition_torch = [
-    "transformer.ln_f.weight",
-    "transformer.ln_f.bias",
-    "transformer.wte.weight",
-    "transformer.wpe.weight",
-    "lm_head.weight",
-]
-
-
-def generate_params_dict(total_layers,
-                         mindspore_params_per_layer,
-                         torch_params_per_layer,
-                         mindspore_additional_params,
-                         torch_additional_params):
-    """
-    Generate the total parameter mapping of mindspore and pytorch.
-
-    Args:
-        total_layers(int): The total layers of the net.
-        mindspore_params_per_layer(list): The list of params per layer for the net of mindspore.
-        torch_params_per_layer(list): The list of params per layer for the net of pytorch.
-        mindspore_additional_params(list): The list of params outside the layer for the net of mindspore
-        torch_additional_params(list): The list  of params outside the layer for the net of pytorch.
-
-    Returns:
-        A list of tuple. The first element is the parameter name of mindspore,
-        the another is the parameter name of pytorch.
-    """
-    mapped_params = list(zip(mindspore_params_per_layer, torch_params_per_layer))
-    ms_extend_param_list = []
-    torch_extend_param_list = []
-    for i in range(total_layers):
-        for ms_para, torch_para in mapped_params:
-            src = ms_para.format(i)
-            tgt = torch_para.format(i)
-
-            ms_extend_param_list.append(src)
-            torch_extend_param_list.append(tgt)
-
-    mapped_params = list(zip(mindspore_additional_params, torch_additional_params))
-    for ms_para, torch_para in mapped_params:
-        ms_extend_param_list.append(ms_para)
-        torch_extend_param_list.append(torch_para)
-
-    return list(zip(ms_extend_param_list, torch_extend_param_list))
-
-
-def print_dict(input_dict):
-    """
-    Print the keys and values of input dict
-
-    Args:
-        input_dict(dict): input dict with key and value.
-
-    Returns:
-        None
-    """
-    for k, v in input_dict.items():
-        print(f"Param: {k} with shape {v.shape}")
-
-
-def convert_pt_to_ms(input_path, output_path, dtype=None, **kwargs):
-    """
-    convert pt to ms
-    """
-    layers = kwargs.pop('layers', 40)
-    input_dir = os.path.dirname(input_path)
-    model = GPTBigCodeForCausalLM.from_pretrained(input_dir).to('cpu')
-    weight_dict = model.state_dict()
-    print_dict(weight_dict)
-
-    mapped_params = generate_params_dict(total_layers=layers,
-                                         mindspore_params_per_layer=ms_name,
-                                         torch_params_per_layer=torch_name,
-                                         mindspore_additional_params=addition_mindspore,
-                                         torch_additional_params=addition_torch)
-    split_torch_attention(weight_dict)
-
-    new_ckpt_list = []
-    # Currently, the ms_extend_param the torch_extend_param is the full parameters.
-    for src, tgt in mapped_params:
-        value = pt2ms(weight_dict[tgt], dtype)
-        # split the attention layer for q, k, v
-
-        # Disable transpose
-        if tgt.endswith('weight') and ('c_proj' in tgt or 'c_fc' in tgt):
-            print("----Transpose tgt:", tgt)
-            value = ms.Tensor(value.transpose([1, 0]))
-
-        print(f"Mapping table Mindspore:{src:<30} \t Torch:{tgt:<30} with shape {value.shape}")
-        new_ckpt_list.append({"data": value, "name": src})
-
-    ms.save_checkpoint(new_ckpt_list, output_path)
-    print(f"Convert finished, the output is saved to {output_path}")
-
-
-def split_torch_attention(state):
-    """
-    split the torch attention parameter
-
-    Args:
-        state(dict): The loaded state dict. The key is parameter name and value is the numpy array.
-
-    Returns:
-        None
-    """
-    s = list(state.keys())
-    for name in s:
-        if name.endswith('attn.c_attn.weight') or name.endswith('attn.c_attn.bias'):
-            value = state.pop(name)
-            print("The real value shape is:", value.shape)
-            q, k, v = np.split(value, [6144, 6272], 0)
-            print("---q shape:", q.shape)
-            print("---k shape:", k.shape)
-            print("---v shape:", v.shape)
-            state[name + '.q'] = q
-            state[name + '.k'] = k
-            state[name + '.v'] = v
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="WizardCoder convert script"
-                                                 "Examples:"
-                                                 "python research/wizardcoder/convert_weight.py --layers 40 "
-                                                 "--torch_path /xxx/pytorch_model.bin --mindspore_path /xxx/ms.ckpt")
-    parser.add_argument('--layers',
-                        type=int,
-                        default=40,
-                        help="The number of layers of the model to be converted.")
-    parser.add_argument("--torch_path",
-                        type=str,
-                        default='/home/wizardcoder/pytorch_models_60step/hf.bin',
-                        help="The torch checkpoint path.")
-    parser.add_argument("--mindspore_path",
-                        type=str,
-                        default="/home/wizardcoder/mindspore_models_rank_60step/rank_0/wizardcoder.ckpt",
-                        help="Use device nums, default is 128.")
-
-    opt = parser.parse_args()
-    convert_pt_to_ms(opt.torch_path, opt.mindspore_path, layers=opt.layers)
diff --git a/research/wizardcoder/finetune_wizardcoder_15b_bf16.yaml b/research/wizardcoder/finetune_wizardcoder_15b_bf16.yaml
deleted file mode 100644
index e2a2018d..00000000
--- a/research/wizardcoder/finetune_wizardcoder_15b_bf16.yaml
+++ /dev/null
@@ -1,214 +0,0 @@
-seed: 0
-run_mode: 'finetune'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "59GB"
-  save_graphs: False         # 存图命令，可以查看网络结构等
-  save_graphs_path: "./graph"
-  device_id: 6
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 2048
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# parallel
-use_parallel: True
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 8
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: True
-profile_communication: True
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'wizardcoder'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: WizardCoderConfig
-    seq_length: 2048
-    n_position: 8192
-    vocab_size: 49153
-    hidden_size: 6144
-    num_layers: 40
-    num_heads: 48
-    expand_ratio: 4
-    hidden_act: "gelu"
-    dropout_prob: 0.0
-    hidden_dropout_prob: 0.0
-    attention_probs_dropout_prob: 0.0
-    initializer_range: 0.02
-    eos_token: 0
-    pad_token: 49152
-    param_init_type: "bfloat16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float16"
-    compute_dtype: "bfloat16"
-    use_past: False
-    use_seq_parallel: True
-    use_select_recompute: True
-    checkpoint_name_or_path: "wizardcoder_15B.ckpt"
-    eos_token_id: 0
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-    do_sample: False
-    use_flash_attention: False
-    batch_size: 1
-  arch:
-    type: WizardCoderLMHeadModel
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 0.000002
-  lr_end: 0.0000005
-  warmup_steps: 80
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "wizardcoder"
-    save_checkpoint_steps: 10000
-    keep_checkpoint_max: 2
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# processor
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<|endoftext|>'
-    bos_token: '<|endoftext|>'
-    eos_token: '<|endoftext|>'
-    pad_token: '[PAD]'
-    vocab_file: 'vocab.json'
-    merge_file: 'merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
diff --git a/research/wizardcoder/finetune_wizardcoder_15b_fp16.yaml b/research/wizardcoder/finetune_wizardcoder_15b_fp16.yaml
deleted file mode 100644
index f239ed5c..00000000
--- a/research/wizardcoder/finetune_wizardcoder_15b_fp16.yaml
+++ /dev/null
@@ -1,214 +0,0 @@
-seed: 0
-run_mode: 'finetune'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "59GB"
-  save_graphs: False         # 存图命令，可以查看网络结构等
-  save_graphs_path: "./graph"
-  device_id: 6
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 2048
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# parallel
-use_parallel: True
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 8
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: True
-profile_communication: True
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'wizardcoder'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: WizardCoderConfig
-    seq_length: 2048
-    n_position: 8192
-    vocab_size: 49153
-    hidden_size: 6144
-    num_layers: 40
-    num_heads: 48
-    expand_ratio: 4
-    hidden_act: "gelu"
-    dropout_prob: 0.0
-    hidden_dropout_prob: 0.0
-    attention_probs_dropout_prob: 0.0
-    initializer_range: 0.02
-    eos_token: 0
-    pad_token: 49152
-    param_init_type: "float16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float16"
-    compute_dtype: "float16"
-    use_past: False
-    use_seq_parallel: True
-    use_select_recompute: True
-    checkpoint_name_or_path: "wizardcoder_15B.ckpt"
-    eos_token_id: 0
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-    do_sample: False
-    use_flash_attention: False
-    batch_size: 1
-  arch:
-    type: WizardCoderLMHeadModel
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 0.000002
-  lr_end: 0.0000005
-  warmup_steps: 80
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "wizardcoder"
-    save_checkpoint_steps: 10000
-    keep_checkpoint_max: 2
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# processor
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<|endoftext|>'
-    bos_token: '<|endoftext|>'
-    eos_token: '<|endoftext|>'
-    pad_token: '[PAD]'
-    vocab_file: 'vocab.json'
-    merge_file: 'merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
diff --git a/research/wizardcoder/infer_bf16_npu.py b/research/wizardcoder/infer_bf16_npu.py
deleted file mode 100644
index 7c80fbdc..00000000
--- a/research/wizardcoder/infer_bf16_npu.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""do infer using wizardcoder"""
-import os
-import argparse
-
-from mindspore import log as logger
-
-from mindformers import MindFormerConfig
-from mindformers.tools.utils import str2bool
-from mindformers.core.context import build_context
-from wizardcoder_config import WizardCoderConfig
-from wizardcoder import WizardCoderLMHeadModel
-from wizardcoder_tokenizer import WizardCoderTokenizer
-
-
-def load_model_and_tokenizer(args):
-    """load model and tokenizer using args."""
-    config = MindFormerConfig(os.path.realpath(args.config_path))
-    config.context.device_id = args.device_id
-    build_context(config)
-    wizard_config = WizardCoderConfig.from_pretrained(os.path.realpath(args.config_path))
-    wizard_config.use_past = args.use_past
-    wizard_config.batch_size = args.batch_size
-    tokenizer = WizardCoderTokenizer(config.processor.tokenizer.vocab_file,
-                                     config.processor.tokenizer.merge_file)
-    model = WizardCoderLMHeadModel(wizard_config)
-    return model, tokenizer
-
-
-def main(args):
-    """do infer"""
-    model, tokenizer = load_model_and_tokenizer(args)
-    # test 4 cases:
-    prompts = [
-        [
-            'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a Python script for this problem:\n\nWrite a python function to find the volume of a triangular prism.\nTest examples:\nassert find_Volume(10,8,6) == 240\nassert find_Volume(3,2,2) == 6\nassert find_Volume(1,2,1) == 1\n\n### Response:'],
-        [
-            'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a Python script for this problem:\n\nWrite a function to find sequences of lowercase letters joined with an underscore.\nTest examples:\nassert text_lowercase_underscore("aab_cbbbc")==(\'Found a match!\')\nassert text_lowercase_underscore("aab_Abbbc")==(\'Not matched!\')\nassert text_lowercase_underscore("Aaab_abbbc")==(\'Not matched!\')\n\n### Response:'],
-        [
-            'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a Python script for this problem:\n\nWrite a function to check if the given tuple list has all k elements.\nTest examples:\nassert check_k_elements([(4, 4), (4, 4, 4), (4, 4), (4, 4, 4, 4), (4, )], 4) == True\nassert check_k_elements([(7, 7, 7), (7, 7)], 7) == True\nassert check_k_elements([(9, 9), (9, 9, 9, 9)], 7) == False\n\n### Response:'],
-        [
-            'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a Python script for this problem:\n\nWrite a function to find the n-th rectangular number.\nTest examples:\nassert find_rect_num(4) == 20\nassert find_rect_num(5) == 30\nassert find_rect_num(6) == 42\n\n### Response:']
-    ]
-
-    for idx, prompt in enumerate(prompts):
-        logger.info(f'==============================Start Case{idx} infer============================')
-        prompt = prompt * args.batch_size
-        logger.info(f"prompt: {[prompt]}")
-        output = model.generate(input_ids=tokenizer(prompt)["input_ids"], use_past=args.use_past,
-                                max_length=args.max_length)
-        output_decode = tokenizer.decode(output[0])
-        logger.info(f"output: {[output_decode]}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--config_path', default='run_wizardcoder_15b.yaml', type=str,
-                        help='config')
-    parser.add_argument('--max_length', default=2048, type=int,
-                        help='max length')
-    parser.add_argument('--batch_size', default=1, type=int,
-                        help='batch_size')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='device_id')
-    parser.add_argument('--use_past', default=True, type=str2bool,
-                        help="use past")
-    args_ = parser.parse_args()
-
-    main(args_)
diff --git a/research/wizardcoder/inference_wizardcoder_pytorch.py b/research/wizardcoder/inference_wizardcoder_pytorch.py
deleted file mode 100644
index 8a1f29f9..00000000
--- a/research/wizardcoder/inference_wizardcoder_pytorch.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test wizardcoder pytorch"""
-import sys
-import time
-import argparse
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
-
-if torch.cuda.is_available():
-    device = "cuda"
-else:
-    device = "cpu"
-
-
-def evaluate(prompts, tokenizer, model, max_length=1024, **kwargs):
-    """evaluate function"""
-    start_time_with_tokenizer = time.time()
-    inputs = tokenizer(prompts, return_tensors="pt", max_length=max_length, truncation=True, padding=True)
-    input_ids = inputs["input_ids"].to(device)
-    generation_config = GenerationConfig(
-        temperature=1,
-        top_p=1,
-        top_k=1,
-        do_sample=False,
-        num_beams=1,
-        eos_token_id=tokenizer.eos_token_id,
-        pad_token_id=tokenizer.pad_token_id,
-        max_length=max_length,
-        **kwargs
-    )
-    start_time_no_tokenizer = time.time()
-    with torch.no_grad():
-        generation_output = model.generate(
-            input_ids=input_ids,
-            generation_config=generation_config,
-            return_dict_in_generate=True,
-            output_scores=True
-        )
-    seq = generation_output.sequences
-    end_time_no_tokenizer = time.time()
-    output = tokenizer.batch_decode(seq, skip_special_tokens=True)
-    end_time_with_tokenizer = time.time()
-    elapsed_time_with_tokenizer = end_time_with_tokenizer - start_time_with_tokenizer
-    elapsed_time_no_tokenizer = end_time_no_tokenizer - start_time_no_tokenizer
-    generate_length = sum([len(item) for item in seq]) - sum([len(ids) for ids in input_ids])
-    return output, generate_length, elapsed_time_with_tokenizer, elapsed_time_no_tokenizer
-
-
-def generate_prompt(input_query):
-    return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-Create a Python script for this problem:
-{input_query}
-
-### Response:"""
-
-
-def main(args, with_prompt=True, load_8bit: bool = False):
-
-    tokenizer = AutoTokenizer.from_pretrained(args.base_model)
-    model = AutoModelForCausalLM.from_pretrained(
-        args.base_model,
-        load_in_8bit=load_8bit,
-        torch_dtype=torch.float16,
-        device_map="auto",
-    )
-    model.config.pad_token_id = tokenizer.pad_token_id
-
-    if not load_8bit:
-        model.half()
-
-    model.eval()
-    if torch.__version__ >= "2" and sys.platform != "win32":
-        model = torch.compile(model)
-
-    input_data = [["使用python编写快速排序代码"] * args.batch_size]
-    for _, instruction in enumerate(input_data):
-        print('\n开始推理.......')
-        if with_prompt:
-            prompt = instruction
-        else:
-            prompt = generate_prompt(instruction)
-        decode_output, generate_length, time_with_tokenizer, time_no_tokenizer = \
-            evaluate(prompt, tokenizer, model, max_length=args.seq_length)
-        print("output: \n", decode_output[0])
-        speed_with_tokenizer = generate_length / time_with_tokenizer
-        speed_no_tokenizer = generate_length / time_no_tokenizer
-        print("\n generate length: ", generate_length,
-              " elapsed_time_with_tokenizer: ", time_with_tokenizer,
-              " elapsed_time_no_tokenizer: ", time_no_tokenizer,
-              " speed_with_tokenizer: ", speed_with_tokenizer,
-              " speed_no_tokenizer: ", speed_no_tokenizer)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--base_model', default='', type=str,
-                        help='base model')
-    parser.add_argument('--seq_length', default=2048, type=int,
-                        help='batch_size')
-    parser.add_argument('--batch_size', default=1, type=int,
-                        help='batch_size')
-
-    opt = parser.parse_args()
-    main(opt)
diff --git a/research/wizardcoder/mbpp_gen_online.py b/research/wizardcoder/mbpp_gen_online.py
deleted file mode 100644
index 4e8c347f..00000000
--- a/research/wizardcoder/mbpp_gen_online.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""mbpp evaluate using online method"""
-
-import json
-import argparse
-from tqdm import tqdm
-import mindspore as ms
-from mindformers.generation import GenerationConfig
-
-from wizardcoder_config import WizardCoderConfig
-from wizardcoder_tokenizer import WizardCoderTokenizer
-from wizardcoder import WizardCoderLMHeadModel
-
-
-def read_mbpp(path):
-    """read mbpp file"""
-    mbpp_problems = {}
-    with open(path, "r", encoding="utf-8") as in_file:
-        for line in in_file:
-            item = json.loads(line.strip())
-            mbpp_problems[item["task_id"]] = item
-    return mbpp_problems
-
-
-def generate_prompt(input_problem):
-    """construct the prompt"""
-    query_prompt = \
-        f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-Create a Python script for this problem:
-{input_problem}
-
-### Response:"""
-    return query_prompt
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--batch_size', default=1, type=int,
-                        help='batch_size')
-    parser.add_argument('--seq_length', default=2048, type=int,
-                        help='batch_size')
-    parser.add_argument('--tokenizer_path', default='/path/mindspore_models/', type=str,
-                        help='tokenizer_path')
-    parser.add_argument('--model_path', default='/path/mindspore_models/wizardcoder.ckpt', type=str,
-                        help='wizardcoder_model_path')
-    parser.add_argument('--device_id', default=0, type=int,
-                        help='set device id.')
-    parser.add_argument('--start_index', default=0, type=int,
-                        help='start_index')
-    parser.add_argument('--end_index', default=0, type=int,
-                        help='end_index')
-    parser.add_argument('--output_path', default="", type=str,
-                        help='output_path')
-    parser.add_argument('--mbpp_path', default="", type=str,
-                        help='mbpp path')
-    args = parser.parse_args()
-
-    ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend", device_id=args.device_id)
-    print(args.start_index, args.end_index)
-    tokenizer = WizardCoderTokenizer(
-        vocab_file=args.tokenizer_path + "vocab.json",
-        merge_file=args.tokenizer_path + "merges.txt"
-    )
-    gen_config = GenerationConfig(
-        do_sample=False,
-        use_past=True,
-        max_length=args.seq_length,
-        eos_token_id=0,
-        pad_token_id=49152
-    )
-    wizardcoder_config = WizardCoderConfig(
-        batch_size=args.batch_size,
-        seq_length=args.seq_length,
-        n_position=8192,
-        vocab_size=49153,
-        hidden_size=6144,
-        num_layers=40,
-        num_heads=48,
-        eos_token_id=0,
-        pad_token_id=49152,
-        checkpoint_name_or_path=args.model_path,
-        use_past=True  # False为自回归推理，True为增量推理
-    )
-    model = WizardCoderLMHeadModel(config=wizardcoder_config)
-    print("model create success!")
-    problems = read_mbpp(args.mbpp_path)
-    task_ids = sorted(problems.keys())[args.start_index: args.end_index]
-    prompts = []
-    for task_id in task_ids:
-        prompt = f"\n{problems[task_id]['text']}\nTest examples:"
-        if task_id == 493:
-            # The test examples are too long. We choose to only include the function name.
-            test_example = problems[task_id]['test_list'][0]
-            prompt += f"\ncalculate_polygons(startx, starty, endx, endy, radius)"
-        else:
-            for test_example in problems[task_id]['test_list']:
-                prompt += f"\n{test_example}"
-        prompts.append(prompt)
-    num_samples = len(prompts)
-    print(num_samples)
-    for i in tqdm(range(num_samples), ncols=0, total=num_samples):
-        output_file = args.output_path + '/{}.json'.format(args.start_index + i)
-
-        prompt = prompts[i].replace('    ', '\t')
-        prompt_batch = [tokenizer(generate_prompt(prompt))['input_ids']]
-        print(prompt_batch)
-        output = model.generate(input_ids=prompt_batch, generation_config=gen_config, max_length=args.seq_length)
-        decode_output = tokenizer.decode(output)
-        print(decode_output)
-        with open(output_file, "w", encoding='utf-8') as f:
-            json.dump({"output": decode_output}, f)
diff --git a/research/wizardcoder/mbpp_process.py b/research/wizardcoder/mbpp_process.py
deleted file mode 100644
index 65ce9823..00000000
--- a/research/wizardcoder/mbpp_process.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""mbpp process script"""
-
-import glob
-import argparse
-import json
-from tqdm import tqdm
-
-
-def read_file_method(opt):
-    """read file method"""
-    file_tuple = [(int(file.split("/")[-1].split(".")[0]), file) for file in glob.glob(opt.path + '/*.json')]
-    sorted_files = sorted(file_tuple, key=lambda x: x[0])
-    gen_files = [item[1] for item in sorted_files]
-    return gen_files
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--path', type=str, help="")
-    parser.add_argument('--out_path', type=str, help="")
-    args = parser.parse_args()
-
-    files = read_file_method(args)
-    print("{} files in {}".format(len(files), args.path))
-
-    res = []
-    count = 0
-    for code_file in tqdm(files, total=len(files)):
-        with open(code_file, "r", encoding="utf-8") as f:
-            data = json.load(f)
-            completion = data["output"][0]
-            if '```python' in completion:
-                def_line = completion.index('```python')
-                completion = completion[def_line:].strip()
-                completion = completion.replace('```python', '')
-                try:
-                    next_line = completion.index('\n```')
-                    completion = completion[:next_line].strip()
-                except ValueError:
-                    print("completion: ", completion)
-                    count += 1
-            if "__name__ == \"__main__\"" in completion:
-                next_line = completion.index('if __name__ == "__main__":')
-                completion = completion[:next_line].strip()
-
-            if "# Example usage" in completion:
-                next_line = completion.index('# Example usage')
-                completion = completion[:next_line].strip()
-
-            if "# Test examples" in completion:
-                next_line = completion.index('# Test examples')
-                completion = completion[:next_line].strip()
-
-            res.append([completion])
-    print("count: ", count)
-    print("save to {}".format(args.out_path))
-    with open(args.out_path, "w", encoding="utf-8") as fout:
-        json.dump(res, fout)
diff --git a/research/wizardcoder/predict_wizardcoder_15b_fp16.yaml b/research/wizardcoder/predict_wizardcoder_15b_fp16.yaml
deleted file mode 100644
index f0d9f4c0..00000000
--- a/research/wizardcoder/predict_wizardcoder_15b_fp16.yaml
+++ /dev/null
@@ -1,214 +0,0 @@
-seed: 0
-run_mode: 'finetune'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "59GB"
-  save_graphs: False         # 存图命令，可以查看网络结构等
-  save_graphs_path: "./graph"
-  device_id: 6
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 2048
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# parallel
-use_parallel: False
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 1
-  model_parallel: 8
-  pipeline_stage: 1
-  optimizer_shard: True
-  micro_batch_num: 1
-  vocab_emb_dp: True            # 默认按mp切，设为True是按照dp切
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1   # 多副本并行，设为2有效果
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: True
-profile_communication: True
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'wizardcoder'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: WizardCoderConfig
-    seq_length: 2048
-    n_position: 8192
-    vocab_size: 49153
-    hidden_size: 6144
-    num_layers: 40
-    num_heads: 48
-    expand_ratio: 4
-    hidden_act: "gelu"
-    dropout_prob: 0.0
-    hidden_dropout_prob: 0.1
-    attention_probs_dropout_prob: 0.1
-    initializer_range: 0.02
-    eos_token: 0
-    pad_token: 49152
-    param_init_type: "float16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float16"
-    compute_dtype: "float16"
-    use_past: False
-    use_seq_parallel: True
-    use_select_recompute: True
-    checkpoint_name_or_path: "wizardcoder.ckpt"
-    eos_token_id: 0
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-    do_sample: True
-    use_flash_attention: False
-    batch_size: 1
-  arch:
-    type: WizardCoderLMHeadModel
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 0.000002
-  lr_end: 0.0000005
-  warmup_steps: 80
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "wizardcoder"
-    save_checkpoint_steps: 2000
-    keep_checkpoint_max: 2
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# processor
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<|endoftext|>'
-    bos_token: '<|endoftext|>'
-    eos_token: '<|endoftext|>'
-    pad_token: '[PAD]'
-    vocab_file: 'vocab.json'
-    merge_file: 'merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
diff --git a/research/wizardcoder/pretrain_wizardcoder_15b_bf16.yaml b/research/wizardcoder/pretrain_wizardcoder_15b_bf16.yaml
deleted file mode 100644
index 9966f94a..00000000
--- a/research/wizardcoder/pretrain_wizardcoder_15b_bf16.yaml
+++ /dev/null
@@ -1,215 +0,0 @@
-seed: 0
-run_mode: 'train'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "59GB"
-  save_graphs: False         # 存图命令，可以查看网络结构等
-  save_graphs_path: "./graph"
-  device_id: 6
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-  gradient_accumulation_steps: 1
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 1  # fp16 2048  bf16 修改为1
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# parallel
-use_parallel: False
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 8
-  vocab_emb_dp: False            # 默认按mp切，设为True是按照dp切
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1   # 多副本并行，设为2有效果
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: True
-profile_communication: True
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'wizardcoder'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: WizardCoderConfig
-    seq_length: 2048
-    n_position: 8192
-    vocab_size: 49153
-    hidden_size: 6144
-    num_layers: 40
-    num_heads: 48
-    expand_ratio: 4
-    hidden_act: "gelu"
-    dropout_prob: 0.0
-    hidden_dropout_prob: 0.0
-    attention_probs_dropout_prob: 0.0
-    initializer_range: 0.02
-    eos_token: 0
-    pad_token: 49152
-    param_init_type: "bfloat16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float16"
-    compute_dtype: "bfloat16"
-    use_past: False
-    use_seq_parallel: True
-    use_select_recompute: True
-    checkpoint_name_or_path: "wizardcoder_15B.ckpt"
-    eos_token_id: 0
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-    do_sample: False
-    use_flash_attention: False
-    batch_size: 1
-  arch:
-    type: WizardCoderLMHeadModel
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR  # 直线找下
-  learning_rate: 0.000002
-  lr_end: 0.0000005
-  warmup_steps: 100  #
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "wizardcoder"
-    save_checkpoint_steps: 10000
-    keep_checkpoint_max: 2
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# processor
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<|endoftext|>'
-    bos_token: '<|endoftext|>'
-    eos_token: '<|endoftext|>'
-    pad_token: '[PAD]'
-    vocab_file: 'vocab.json'
-    merge_file: 'merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
diff --git a/research/wizardcoder/run_wizardcoder.py b/research/wizardcoder/run_wizardcoder.py
deleted file mode 100644
index 0c3f238a..00000000
--- a/research/wizardcoder/run_wizardcoder.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""InternLM Train/Finetune/Eval/Predict scripts."""
-import os
-import argparse
-
-from mindformers import Trainer, MindFormerConfig
-from mindformers import init_context, ContextConfig, ParallelContextConfig
-from mindformers.tools.utils import str2bool
-from mindformers.core.context import build_context
-
-# pylint: disable=W0611
-import wizardcoder
-import wizardcoder_modules
-from wizardcoder_tokenizer import WizardCoderTokenizer
-
-
-def context_init(use_parallel=False, optimizer_parallel=False, device_id=0):
-    """init context for mindspore."""
-    context_config = ContextConfig(mode=0, device_target="Ascend", device_id=device_id)
-    parallel_config = None
-    if use_parallel:
-        parallel_config = ParallelContextConfig(parallel_mode='SEMI_AUTO_PARALLEL',
-                                                gradients_mean=False,
-                                                enable_parallel_optimizer=optimizer_parallel,
-                                                full_batch=True)
-    init_context(use_parallel=use_parallel,
-                 context_config=context_config,
-                 parallel_config=parallel_config)
-
-
-def generate_prompt(instruction):
-    """the prompt used for wizardcoder, which is corresponding to the training process"""
-    return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{instruction}
-
-### Response:"""
-
-
-def main(task='text_generation',
-         config='run_wizardcoder.yaml',
-         run_mode='train',
-         pet_method='',
-         use_parallel=False,
-         ckpt=None,
-         train_dataset='',
-         eval_dataset='',
-         predict_data='',
-         max_length=512,
-         vocab_file=None,
-         merge_file=None,
-         op=True,
-         device_id=0):
-    """main function."""
-    # 环境初始化
-    if os.path.exists(config) and config.endswith(('.yaml', '.yml')):
-        config = MindFormerConfig(os.path.realpath(config))
-        config.use_parallel = use_parallel
-        if vocab_file is not None:
-            config.processor.tokenizer.vocab_file = vocab_file
-        if vocab_file is not None:
-            config.processor.tokenizer.merge_file = merge_file
-        config.context.device_id = device_id
-        build_context(config)
-    else:
-        context_init(use_parallel, op, device_id)
-
-    if ckpt is not None and ckpt != '':
-        config.load_checkpoint = ckpt
-
-    # 定义任务，预先准备好相应数据集
-    if run_mode == 'train':
-        task = Trainer(args=config,
-                       task=task,
-                       train_dataset=train_dataset,
-                       pet_method=pet_method)
-        task.train(train_checkpoint=config.load_checkpoint, auto_trans_ckpt=config.auto_trans_ckpt)
-
-    elif run_mode == 'finetune':
-        task = Trainer(args=config,
-                       task=task,
-                       train_dataset=train_dataset,
-                       pet_method=pet_method)
-        task.finetune(finetune_checkpoint=config.load_checkpoint, auto_trans_ckpt=config.auto_trans_ckpt)
-
-    elif run_mode == 'eval':
-        task = Trainer(args=config,
-                       task=task,
-                       eval_dataset=eval_dataset,
-                       pet_method=pet_method)
-        task.evaluate(eval_checkpoint=config.load_checkpoint)
-
-    elif run_mode == 'predict':
-        task = Trainer(args=config,
-                       task=task)
-        prompt = generate_prompt(predict_data)
-        result = task.predict(input_data=prompt,
-                              predict_checkpoint=ckpt, max_length=int(max_length))
-        print(result)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--task', default='text_generation', type=str,
-                        help='set task type.')
-    parser.add_argument('--config', default='run_wizardcoder.yaml', type=str,
-                        help='set task type.')
-    parser.add_argument('--run_mode', default='train', type=str,
-                        help='set run mode for model.')
-    parser.add_argument('--pet_method', default='', type=str,
-                        help='set pet method for low parameter finetune.')
-    parser.add_argument('--use_parallel', default=False, type=str2bool,
-                        help='open parallel for model.')
-    parser.add_argument('--load_checkpoint', default='', type=str,
-                        help='load_checkpoint')
-    parser.add_argument('--resume', default=False, type=str2bool,
-                        help='whether resume training.')
-    parser.add_argument('--train_dataset', default='', type=str,
-                        help='set train dataset.')
-    parser.add_argument('--eval_dataset', default='', type=str,
-                        help='set eval dataset.')
-    parser.add_argument('--predict_data', default='', type=str,
-                        help='input predict data.')
-    parser.add_argument('--predict_length', default=512, type=int,
-                        help='max length for predict output.')
-    parser.add_argument('--optimizer_parallel', default=False, type=str2bool,
-                        help='whether use optimizer parallel. Default: False')
-    parser.add_argument('--device_id', default=1, type=int,
-                        help='ID of the target device, the value must be in [0, device_num_per_host-1]')
-    parser.add_argument('--vocab_file', default=None, type=str,
-                        help='tokenizer model')
-    parser.add_argument('--merge_file', default=None, type=str,
-                        help='tokenizer model')
-    args = parser.parse_args()
-
-    main(task=args.task,
-         config=args.config,
-         run_mode=args.run_mode,
-         pet_method=args.pet_method,
-         use_parallel=args.use_parallel,
-         ckpt=args.load_checkpoint,
-         train_dataset=args.train_dataset,
-         eval_dataset=args.eval_dataset,
-         predict_data=args.predict_data,
-         max_length=args.predict_length,
-         op=args.optimizer_parallel,
-         device_id=args.device_id,
-         vocab_file=args.vocab_file,
-         merge_file=args.merge_file)
diff --git a/research/wizardcoder/run_wizardcoder.yaml b/research/wizardcoder/run_wizardcoder.yaml
deleted file mode 100644
index f0d9f4c0..00000000
--- a/research/wizardcoder/run_wizardcoder.yaml
+++ /dev/null
@@ -1,214 +0,0 @@
-seed: 0
-run_mode: 'finetune'
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ""
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-
-# context
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "59GB"
-  save_graphs: False         # 存图命令，可以查看网络结构等
-  save_graphs_path: "./graph"
-  device_id: 6
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
-
-# runner
-runner_config:
-  epochs: 1
-  batch_size: 1
-  sink_mode: True
-  sink_size: 2
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 2048
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-# parallel
-use_parallel: False
-parallel:
-  parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
-  gradients_mean: False
-  loss_repeated_mean: True
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-parallel_config:
-  data_parallel: 1
-  model_parallel: 8
-  pipeline_stage: 1
-  optimizer_shard: True
-  micro_batch_num: 1
-  vocab_emb_dp: True            # 默认按mp切，设为True是按照dp切
-  gradient_aggregation_group: 4
-micro_batch_interleave_num: 1   # 多副本并行，设为2有效果
-
-# moe
-moe_config:
-  expert_num: 1
-  capacity_factor: 1.05
-  aux_loss_factor: 0.05
-  num_experts_chosen: 1
-
-# recompute
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: False
-
-# autotune
-auto_tune: True
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-# profile
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: True
-profile_communication: True
-profile_memory: True
-
-# Trainer
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'wizardcoder'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-
-# train dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 1
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids", "labels"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# model
-model:
-  model_config:
-    type: WizardCoderConfig
-    seq_length: 2048
-    n_position: 8192
-    vocab_size: 49153
-    hidden_size: 6144
-    num_layers: 40
-    num_heads: 48
-    expand_ratio: 4
-    hidden_act: "gelu"
-    dropout_prob: 0.0
-    hidden_dropout_prob: 0.1
-    attention_probs_dropout_prob: 0.1
-    initializer_range: 0.02
-    eos_token: 0
-    pad_token: 49152
-    param_init_type: "float16"
-    layernorm_dtype: "float32"
-    softmax_dtype: "float16"
-    compute_dtype: "float16"
-    use_past: False
-    use_seq_parallel: True
-    use_select_recompute: True
-    checkpoint_name_or_path: "wizardcoder.ckpt"
-    eos_token_id: 0
-    repetition_penalty: 1
-    max_decode_length: 1024
-    top_k: 5
-    top_p: 1
-    do_sample: True
-    use_flash_attention: False
-    batch_size: 1
-  arch:
-    type: WizardCoderLMHeadModel
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 0.000002
-  lr_end: 0.0000005
-  warmup_steps: 80
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-layer_scale: False
-layer_decay: 0.65
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.999
-  eps: 0.00000001
-  weight_decay: 0
-lr_scale: False
-lr_scale_factor: 256
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "wizardcoder"
-    save_checkpoint_steps: 2000
-    keep_checkpoint_max: 2
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-eval_callbacks:
-  - type: ObsMonitor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# processor
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<|endoftext|>'
-    bos_token: '<|endoftext|>'
-    eos_token: '<|endoftext|>'
-    pad_token: '[PAD]'
-    vocab_file: 'vocab.json'
-    merge_file: 'merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
diff --git a/research/wizardcoder/wizardcoder.md b/research/wizardcoder/wizardcoder.md
deleted file mode 100644
index ddb1ab4e..00000000
--- a/research/wizardcoder/wizardcoder.md
+++ /dev/null
@@ -1,514 +0,0 @@
-# WizardCoder
-
-## 模型描述
-
-WizardCoder是由WizardLM团队推出了一个新的指令微调代码大模型，打破了闭源模型的垄断地位，超越了闭源大模型Anthropic Claude和谷歌的Bard。WizardCoder大幅度地提升了开源模型的SOTA水平，创造了惊人的进步，提高了22.3%的性能，成为了开源领域的新时代引领者。
-WizardCoder完全开源可商用，基于 Transformer 结构，上下文窗口长度为 2048，参数量为150亿。 本仓库提供了WizardCoder-15B预训练模型。
-
-## 仓库介绍
-
-`WizardCoder` 基于 `mindformers` 实现，主要涉及的文件有：
-
-1. 模型具体实现：`research/wizardcoder`
-
-    ```bash
-    wizardcoder
-        ├── wizardcoder_tokenizer.py       # tokenizer
-        ├── wizardcoder.py                 # 15B模型实现
-        └── wizardcoder_modules.py         # self-attention模块实现
-    ```
-
-2. 模型配置：`research/wizardcoder`
-
-    ```bash
-    wizardcoder
-        └── run_wizardcoder.yaml           # 15B全量微调Atlas 800T A2启动配置
-    ```
-
-3. 数据处理脚本和任务启动脚本：`research/wizardcoder`
-
-    ```bash
-    wizardcoder
-        ├── wizardcoder_preprocess.py      # wizardcoder数据集预处理脚本
-        └── run_wizardcoder.py             # wizardcoder高阶接口使用脚本
-    ```
-
-### 环境要求
-
-- 硬件: Atlas 800T A2
-
-### 支持源码编译安装，用户可以执行下述的命令进行包的安装：
-
-```shell
-#!/bin/bash
-git clone -b dev https://gitee.com/mindspore/mindformers.git
-cd mindformers
-bash build.sh
-pip install -r requirements.txt
-```
-
-设置环境变量
-
-```shell
-#!/bin/bash
-export ASCEND_CUSTOM_PATH=/path/cann/ascend-toolkit
-export ASCEND_HOME_PATH=$ASCEND_CUSTOM_PATH
-
-#导入CANN基本环境变量
-source $ASCEND_CUSTOM_PATH/set_env.sh
-export LD_LIBRARY_PATH=$ASCEND_CUSTOM_PATH/latest/lib64:$ASCEND_CUSTOM_PATH/latest/opp/built-in/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64:$LD_LIBRARY_PATH
-
-#配置整网ND消除格式转换算子
-export MS_ENABLE_FORMAT_MODE=1
-
-#REF模式和CELL共享
-export MS_DISABLE_REF_MODE=1
-
-#内存优化：配置atomic内存单独清零
-export MS_GE_ATOMIC_CLEAN_POLICY=1
-
-#内存优化：配置内存扩展模式（实现纯静态图之间内存复用）
-export GE_USE_STATIC_MEMORY=2
-```
-
-**注：** `ASCEND_CUSTOM_PATH`的`path`替换为CANN包真实地址
-
-### 模型权重下载与转换(mindformers权重或huggingface权重选择使用即可)
-
-#### mindformers权重直接使用
-
-本仓库提供已经转换完成的预训练权重用于训练/微调/推理，用户可自行从下方链接拉取后直接使用，Base用于微调。
-
-```shell
-#!/bin/bash
-mkdir -p ckpt/rank_0
-cd ./ckpt/rank_0
-wget https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/wizardcode/wizardcoder_15B.ckpt
-cd ../..
-```
-
-#### huggingface权重转换后使用
-
-从huggingface下载预训练权重后根据以下步骤进行权重转换，需要下载整个工程，huffingface权重的链接如下：
-
-```shell
-#!/bin/bash
-mkdir -p ckpt/rank_0
-cd ./ckpt
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/added_tokens.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/config.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/generation_config.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/merges.txt
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/pytorch_model.bin
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/special_tokens_map.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/tokenizer.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/tokenizer_config.json
-wget https://huggingface.co/WizardLM/WizardCoder-15B-V1.0/resolve/main/vocab.json
-cd ..
-```
-
-**注**: 请安装torch=1.11.0和transformers=4.30.2版本; 进行模型转换后，需要重新根据本项目[requirements.txt](../../requirements.txt)恢复tokenizers版本
-
-```shell
-#!/bin/bash
-pip install torch==1.11.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
-pip install transformers==4.30.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
-
-# 后续转换任务完成后
-pip install -r requirement.txt
-```
-
-下载完成后，运行`/research/wizardcoder/convert_weight.py`转换脚本，将huggingface的权重转换为完整的ckpt权重。
-
-```shell
-#!/bin/bash
-python ./research/wizardcoder/convert_weight.py \
---torch_path ./ckpt/pytorch_model.bin \
---mindspore_path ./ckpt/rank_0/wizardcoder_15b.ckpt
-```
-
-```text
-# 参数说明
-torch_path: huggingface权重保存目录路径下任意权重bin文件，根据文件路径读取目录下全部权重
-mindspore_path: mindspore格式的权重保存文件名，如'saved_dir/wizardcoder.ckpt'
-```
-
-## WizardCoder-15B
-
-### 训练和微调性能
-
-| config                                                       | task                  | Datasets  | SeqLength | metric | phase             | score     | performance(tokens/s/p)  |
-| ------------------------------------------------------------ | --------------------- | --------- | --------- | ------ | ----------------- | --------- | ------------ |
-| [wizardcoder_15b](./run_wizardcoder.yaml)    | text_generation   | alpaca      | 2048      | -      | [train](#预训练)  | -         | 798.7  |
-| [wizardcoder_15b](./run_wizardcoder.yaml)    | text_generation   | alpaca      | 2048      | -      | [finetune](#微调)  | -         | 798.7  |
-
-```shell
-#!/bin/bash
-pip install mindspore==2.2.0
-pip install mindpet==1.0.2
-```
-
-### 预训练
-
-#### 数据集准备-预训练数据集
-
-当前提供Alpaca数据集的预处理和预训练样例，用于对wizardcoder-15B模型进行预训练。数据集的官方下载链接如下：
-
-```shell
-#!/bin/bash
-mkdir dataset
-cd ./dataset
-wget https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json
-cd ..
-```
-
-修改`research/wizardcoder/wizardcoder_preprocess.py`，进行数据预处理、Mindrecord数据生成，将带有prompt模板的数据转换为mindrecord格式。
-
-```python
-# 原代码 research/wizardcoder/wizardcoder_preprocess.py, 138行
-# tokenize_qa()的入参if_jsonl需要设置为False
-def tokenize_qa(tokenizer, file_path, max_length, if_jsonl=False):
-    ...
-```
-
-```shell
-#!/bin/bash
-python research/wizardcoder/wizardcoder_preprocess.py \
---input_glob ./dataset/alpaca_data.json \
---vocab_file ./ckpt/vocab.json \
---merge_file ./ckpt/merges.txt \
---output_file ./dataset/alpaca_data.mindrecord \
---seq_length 2048
-```
-
-#### 预训练启动
-
-- step 1. 修改`research/wizardcoder/run_wizardcoder.yaml`中相关配置
-
-```text
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: './ckpt'          # 添加预训练权重路径
-auto_trans_ckpt: True
-only_save_strategy: False
-resume_training: False
-use_parallel: True
-run_mode: 'train'
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "{path}/adgen.mindrecord"   # 修改训练数据集路径
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-```
-
-- step 2. 启动微调任务，按照以下步骤启动：
-
--[x] 1: 根据服务器节点数等信息，修改相应的配置。
-
-```shell
-#!/bin/bash
-# 以wizardcoder模型为例，默认配置单机8卡，如果节点数有变，需要修改相应的配置。
-# 配置文件路径：./research/wizardcoder/run_wizardcoder.yaml
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 8
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-```
-
--[x] 2: 执行运行脚本。
-
-```shell
-#!/bin/bash
-bash scripts/msrun_launcher.sh \
-"python research/wizardcoder/run_wizardcoder.py \
---config research/wizardcoder/run_wizardcoder.yaml \
---load_checkpoint ./ckpt \
---use_parallel True \
---run_mode train \
---train_data ./dataset/alpaca_data.mindrecord" 8
-```
-
-```text
-# 参数说明
-config: 配置文件路径
-load_checkpoint: 权重文件夹路径
-run_mode: 运行模式，训练时设置为train
-train_data: 训练数据集路径
-```
-
-### 微调
-
-#### 数据集准备-SFT微调数据集
-
-当前提供codealpaca数据集的预处理和微调样例，用于对wizardcoder-15B模型进行微调。数据集下载链接如下：
-
-```shell
-#!/bin/bash
-mkdir finetune_dataset
-cd ./finetune_dataset
-wget https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k/resolve/main/code_alpaca_20k.json
-cd ..
-```
-
-执行`research/wizardcoder/wizardcoder_preprocess.py`，进行数据预处理、Mindrecord数据生成，将带有prompt模板的数据转换为mindrecord格式。
-
-```python
-# 原代码 research/wizardcoder/wizardcoder_preprocess.py, 138行
-# tokenize_qa()的入参if_jsonl需要设置为False
-def tokenize_qa(tokenizer, file_path, max_length, if_jsonl=False):
-    ...
-```
-
-```bash
-# 脚本路径：research/wizardcoder/wizardcoder_preprocess.py
-python research/wizardcoder/wizardcoder_preprocess.py \
---input_glob ./finetune_dataset/code_alpaca_20k.json \
---vocab_file ./ckpt/vocab.json \
---merge_file ./ckpt/merges.txt \
---output_file ./finetune_dataset/code_alpaca.mindrecord \
---seq_length 2048
-```
-
-#### 全参微调
-
-全参微调需要多卡启动，以`CodeAlpaca-20k`数据集为例,给出了默认配置文件`research/wizardcoder/run_wizardcoder.yaml`。
-
-- step 1. 修改`research/wizardcoder/run_wizardcoder.yaml`中相关配置
-
-```text
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: './output/transformed_checkpoint/'          # 添加预训练权重路径
-auto_trans_ckpt: False
-only_save_strategy: False
-resume_training: False
-use_parallel: True
-run_mode: 'finetune'
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: "./finetune_dataset/code_alpaca.mindrecord"   # 修改训练数据集路径
-    shuffle: True
-  input_columns: ["input_ids", "labels"]
-# 指令微调时（如code_alpaca数据集），input_columns: ["input_ids", "labels"]
-```
-
-- step 2. 启动微调任务，按照以下步骤启动：
-
--[x] 1: 根据服务器节点数等信息，修改相应的配置。
-
-```shell
-# 以wizardcoder模型为例，默认配置单机8卡，如果节点数有变，需要修改相应的配置。
-# 配置文件路径：./research/wizardcoder/run_wizardcoder.yaml
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 2
-  optimizer_shard: True
-  micro_batch_num: 8
-  vocab_emb_dp: True
-  gradient_aggregation_group: 4
-```
-
--[x] 2: 执行运行脚本。
-
-```shell
-bash scripts/msrun_launcher.sh \
-"python research/wizardcoder/run_wizardcoder.py \
---config research/wizardcoder/run_wizardcoder.yaml \
---load_checkpoint ./output/transformed_checkpoint/ \
---use_parallel True \
---run_mode finetune \
---train_data ./finetune_dataset/code_alpaca.mindrecord" 8
-```
-
-```text
-# 参数说明
-config: 配置文件路径
-load_checkpoint: 权重文件夹路径
-run_mode: 运行模式，微调时设置为finetune
-train_data: 训练数据集路径
-```
-
-### 快速推理
-
-**注：** 推理部分需要更新如下环境变量
-
-```shell
-#!/bin/bash
-unset MS_DISABLE_REF_MODE=1
-export MS_ENABLE_REF_MODE=1
-```
-
-#### 基于高阶接口的推理
-
-- step 1. 配置文件设置，添加tokenizer路径`vocab_file`和`merge_file`，并设置`batch_size`值为`1`
-
-在使用Trainer接口进行推理时，若用户自行下载wizardcoder权重，请在启动前先在配置文件中将vocab.json和merges.txt的路径自行配置，配置项为vocab_file和merge_file。
-
-```yaml
-# research/wizardcoder/run_wizardcoder.yaml
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 1                 # batch_size设为1
-  sink_mode: True
-  sink_size: 2
-...
-processor:
- return_tensors: ms
- tokenizer:
-   unk_token: '<|endoftext|>'
-   bos_token: '<|endoftext|>'
-   eos_token: '<|endoftext|>'
-   pad_token: '[PAD]'
-   vocab_file: 'vocab.json'        # 添加vocab.json的路径
-   merge_file: 'merges.txt'        # 添加merges.txt的路径
-   type: WizardCoderTokenizer
-```
-
-相关文件的下载链接如下：[vocab.json](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/wizardcode/vocab.json) 和 [merges.txt](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/MindFormers/wizardcode/merges.txt)
-
-- step 2. Trainer接口启动推理
-
-wizardcoder的高阶接口使用脚本已集成在run_wizardcoder.py脚本中，运行此脚本命令示例：
-
-其中`ckpt_path_or_dir`为模型文件地址，如：{path}/wizardcoder.ckpt
-
-```shell
-python run_wizardcoder.py \
---config "run_wizardcoder.yaml" \
---run_mode predict \
---use_parallel False \
---load_checkpoint ckpt_path_or_dir \
---predict_data '使用python编写快速排序代码' \
---device_id 0
-
-# output: 快速排序（QuickSort）是一种非常高效的排序算法，它是选择排序算法的一个非常有效的改进版本。它的基本思想是通过一趟排序将待排记录分隔成独立的两部分，其中一部分记录的元素值比另一部分的元素值小，然后再按此方法对子部分继续进行排序，直到整个序列有序。\n\nPython中的快速排序算法可以实现如下：\n\n```\ndef quicksort(arr):\n    if len(arr) <= 1:\n        return arr\n    else:\n        pivot = arr[0]\n        left = [x for x in arr[1:] if x <= pivot]\n        middle = pivot\n        right = [x for x in arr[1:] if x > pivot]\n        return quicksort(left) + [middle] + quicksort(right)\n```\n\n这个函数接收一个列表作为输入，并返回一个排序后的列表。\n\n该函数首先检查输入列表的长度，如果长度为0或1，直接返回列表。否则，选取第一项作为分区点（pivot），然后将列表中所有小于等于这个分区点的元素放入左子列表，大于分区点的元素放入右子列表。最后，递归地调用左子列表和右子列表的排序函数。\n\n这样，当递归到最底层的时候，每个子列表中只包含一个元素，这时候就不用再递归了。最后，将两个子列表连接起来，并加上分区点，得到一个排序后的列表。
-```
-
-#### Pipeline推理(单卡)
-
-在使用Pipeline接口进行推理时，用户自行下载Wizardcoder-15B权重和tokenizer文件，在启动前自行配置路径
-WizardCoderConfig的入参use_past=False为自回归推理，use_past=True为增量推理
-
-**注：** 使用如下脚本推理，其中`wizardcoder_model_path`是权重存放的地址，`tokenizer_path`是存放vocab.json和merges.txt的目录地址
-
-```text
-import os
-import sys
-
-sys.path.append(os.path.abspath("../.."))
-sys.path.insert(0, os.getcwd().split('research')[0])
-from mindspore import context
-from mindformers.pipeline import pipeline
-
-from wizardcoder_config import WizardCoderConfig
-from wizardcoder import WizardCoderLMHeadModel
-from wizardcoder_tokenizer import WizardCoderTokenizer
-
-context.set_context(device_id=0, mode=0)
-
-# init model
-
-wizardcoder_model_path = "/path/wizardcoder_15b.ckpt" # 添加模型文件地址
-wizardcoder_config = WizardCoderConfig(
-    batch_size=1,
-    seq_length=2048,
-    n_position=8192,
-    vocab_size=49153,
-    hidden_size=6144,
-    num_layers=40,
-    num_heads=48,
-    eos_token_id=0,
-    pad_token_id=49152,
-    checkpoint_name_or_path=wizardcoder_model_path,
-    use_past=True # False为自回归推理，True为增量推理
-)
-wizardcoder_model = WizardCoderLMHeadModel(config=wizardcoder_config)
-wizardcoder_model.add_flags_recursive(is_first_iteration=True)
-
-# init tokenizer
-
-tokenizer_path = "/path/Wizardcoder-15B/tokenizer_path/" # Wizardcoder-15B tokenizer path
-tokenizer = WizardCoderTokenizer(
-    vocab_file=tokenizer_path + "vocab.json",    # tokenizer_path为存放vocab.json和merges.txt的地址
-    merge_file=tokenizer_path + "merges.txt"
-)
-pipeline_task = pipeline(task="text_generation", model=wizardcoder_model, tokenizer=tokenizer)
-input_data = "使用python编写快速排序代码"
-pipeline_result = pipeline_task([input_data],
-                                do_sample=False,
-                                max_length=2048)
-print(pipeline_result)
-
-
-# output: [{'text_generation_text': ['使用python编写快速排序代码，并分析其时间复杂度。\r\n\r\n快速排序是一种分治算法，它的基本思想是：通过一趟排序将待排记录分隔成独立的两部分，其中一部分记录的关键字均比另一部分的关键字小，则可分别对这两部分记录继续进行排序，以达到整个序列有序。\r\n\r\n快速排序的步骤如下：\r\n\r\n1. 从数列中挑出一个元素，称为 “基准”（pivot）\r\n2. 重新排序数列，所有元素比基准值小的摆放在基准前面，所有元素比基准值大的摆在基准的后面（相同的数可以到任一边）。在这个分区退出之后，该基准就处于数列的中间位置。这个称为分区（partition）操作。\r\n3. 递归地（recursive）把小于基准值元素的子数列和大于基准值元素的子数列排序。\r\n\r\n快速排序的时间复杂度为O(nlogn)，最坏情况下的时间复杂度为O(n^2)，平均情况下的时间复杂度为O(nlogn)。\r\n\r\n下面是Python代码实现的快速排序：\r\n\r\n```python\r\ndef quick_sort(arr):\r\n    if len(arr) <= 1:\r\n        return arr\r\n    else:\r\n        pivot = arr[0]\r\n        left = []\r\n        right = []\r\n        for i in range(1, len(arr)):\r\n            if arr[i] < pivot:\r\n                left.append(arr[i])\r\n            else:\r\n                right.append(arr[i])\r\n        return quick_sort(left) + [pivot] + quick_sort(right)\r\n```\r\n\r\n该代码的基本思路是：\r\n\r\n1. 如果数组的长度小于等于1，则直接返回数组。\r\n2. 选择数组的第一个元素作为基准值。\r\n3. 遍历数组，将比基准值小的元素放到左边，将比基准值大的元素放到右边。\r\n4. 递归地对左边和右边的子数组进行排序。\r\n5. 将左边子数组、基准值、右边子数组合并成一个新的数组。\r\n\r\n下面是该代码的时间复杂度分析：\r\n\r\n- 最坏情况下的时间复杂度：当数组的长度为n，且每次选择的基准值都为数组的第一个元素时，每次递归都需要进行n-1次，因此最坏情况下的时间复杂度为O(n^2)。\r\n- 平均情况下的时间复杂度：每次选择的基准值都为数组的中间元素，每次递归都需要进行logn次，因此平均情况下的时间复杂度为O(nlogn)。\r\n- 最优情况下的时间复杂度：当数组的长度为n，且每次选择的基准值都为数组的第一个元素时，每次递归都需要进行logn次，因此最优情况下的时间复杂度为O(nlogn)。']}]
-
-```
-
-#### Pipeline推理(单机多卡)
-
-以单机4卡分布式推理为例，设置dp=1, mp=4
-
-- step 1. yaml配置
-
-修改run_wizardcoder.yaml中的配置项，只需要修改如下yaml中的vocab_file和merge_file地址
-
-```yaml
-use_parallel: True   # 单机多卡或多机多卡必须设为True
-parallel_config:
-  data_parallel: 1
-  model_parallel: 4
-  pipeline_stage: 1
-model:
-  model_config:
-    use_seq_parallel: False
-    use_past: True   # False为自回归推理，True为增量推理
-    checkpoint_name_or_path: ""
-processor:
-  tokenizer:
-    vocab_file: '/tokenizer_path/vocab.json'
-    merge_file: '/tokenizer_path/merges.txt'
-    type: WizardCoderTokenizer
-  type: WizardCoderProcessor
-...
-```
-
-- step 2. 切分权重
-
-```text
-    └── distribute_model_ckpt_path
-        └── rank_0
-            └── checkpoint_0.ckpt
-        └── rank_1
-            └── checkpoint_1.ckpt
-        └── rank_2
-            └── checkpoint_2.ckpt
-        └── rank_3
-            └── checkpoint_3.ckpt
-```
-
-- step 3. 推理脚本
-
-```bash
-bash scripts/msrun_launcher.sh "research/wizardcoder/run_wizardcoder.py \
---config research/wizardcoder/run_wizardcoder.yaml \
---load_checkpoint ./output/transformed_checkpoint/ \
---use_parallel True \
---run_mode predict \
---predict_data '使用python编写快速排序代码，并分析其时间复杂度' \
---vocab_file vocab.json \
---merge_file merges.txt" 8
-```
-
-推理结果
-
-```text
-{'text_generation_text': ['使用python编写快速排序代码，并分析其时间复杂度。\r\n\r\n快速排序是一种分治算法，它的基本思想是：通过一趟排序将待排记录分隔成独立的两部分，其中一部分记录的关键字均比另一部分的关键字小，则可分别对这两部分记录继续进行排序，以达到整个序列有序。\r\n\r\n快速排序的步骤如下：\r\n\r\n1. 从数列中挑出一个元素，称为 “基准”（pivot）\r\n2. 重新排序数列，所有元素比基准值小的摆放在基准前面，所有元素比基准值大的摆在基准的后面（相同的数可以到任一边）。在这个分区退出之后，该基准就处于数列的中间位置。这个称为分区（partition）操作。\r\n3. 递归地（recursive）把小于基准值元素的子数列和大于基准值元素的子数列排序。\r\n\r\n快速排序的时间复杂度为O(nlogn)，最坏情况下的时间复杂度为O(n^2)，平均情况下的时间复杂度为O(nlogn)。\r\n\r\n下面是Python代码实现的快速排序：\r\n\r\n```python\r\ndef quick_sort(arr):\r\n    if len(arr) <= 1:\r\n        return arr\r\n    else:\r\n        pivot = arr[0]\r\n        left = []\r\n        right = []\r\n        for i in range(1, len(arr)):\r\n            if arr[i] < pivot:\r\n                left.append(arr[i])\r\n            else:\r\n                right.append(arr[i])\r\n        return quick_sort(left) + [pivot] + quick_sort(right)\r\n```\r\n\r\n该代码的基本思路是：\r\n\r\n1. 如果数组的长度小于等于1，则直接返回数组。\r\n2. 选择数组的第一个元素作为基准值。\r\n3. 遍历数组，将比基准值小的元素放到左边，将比基准值大的元素放到右边。\r\n4. 递归地对左边和右边的子数组进行排序。\r\n5. 将左边子数组、基准值、右边子数组合并成一个新的数组。\r\n\r\n下面是该代码的时间复杂度分析：\r\n\r\n- 最坏情况下的时间复杂度：当数组的长度为n，且每次选择的基准值都为数组的第一个元素时，每次递归都需要进行n-1次，因此最坏情况下的时间复杂度为O(n^2)。\r\n- 平均情况下的时间复杂度：每次选择的基准值都为数组的中间元素，每次递归都需要进行logn次，因此平均情况下的时间复杂度为O(nlogn)。\r\n- 最优情况下的时间复杂度：当数组的长度为n，且每次选择的基准值都为数组的第一个元素时，每次递归都需要进行logn次，因此最优情况下的时间复杂度为O(nlogn)。']}
-```
diff --git a/research/wizardcoder/wizardcoder.py b/research/wizardcoder/wizardcoder.py
deleted file mode 100644
index 7927a02d..00000000
--- a/research/wizardcoder/wizardcoder.py
+++ /dev/null
@@ -1,417 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""WizardCoder model"""
-import copy
-
-import numpy as np
-import mindspore.nn as nn
-import mindspore.common.dtype as mstype
-from mindspore.common.tensor import Tensor
-from mindspore.common.initializer import initializer
-from mindspore.ops import operations as P
-
-from mindformers.models.utils import lazy_inline
-from mindformers.modules.transformer.moe import default_moe_config
-from mindformers.modules.layers import LayerNorm
-from mindformers.version_control import get_dropout
-from mindformers.core.loss import CrossEntropyLoss
-from mindformers.modules.transformer import AttentionMask
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.models.modeling_utils import PreTrainedModel
-from mindformers.tools.logger import logger
-from wizardcoder_config import WizardCoderConfig
-from wizardcoder_modules import WizardCoderTransformerDecoderLayer, WizardCoderVocabEmbedding
-
-
-__all__ = ['WizardCoderLMHeadModel']
-
-
-class WizardCoderPreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = WizardCoderConfig
-    base_model_prefix = "wizardcoder"
-
-
-@MindFormerRegister.register(MindFormerModuleType.MODELS)
-class WizardCoderLMHeadModel(WizardCoderPreTrainedModel):
-    r"""
-        Provide wizardcoder training loss or logits through network.
-        Args:
-            config (WizardCoderConfig): The config of WizardCoderModel.
-
-        Returns:
-            Tensor, the loss or logits of the network.
-        """
-    @lazy_inline
-    def __init__(self, config: WizardCoderConfig = None):
-        config = config if config is not None else WizardCoderConfig()
-        super(WizardCoderLMHeadModel, self).__init__(config, auto_prefix=True)
-        self.use_past = config.use_past
-        self.eos_token = self.config.eos_token
-        self.pad_token = self.config.pad_token
-        self.eos_token_tensor = Tensor((np.ones((1, 1)) * self.eos_token).astype(np.int32))
-        self.seq_length = config.seq_length
-
-        parallel_config = self.config.parallel_config
-        dp, mp = parallel_config.data_parallel, parallel_config.model_parallel
-        self.stridedslice = P.StridedSlice().shard(((dp, 1),))
-        self.not_equal = P.NotEqual().shard(((dp, 1), ()))
-
-        # AttentionMask default compute_dtype is fp16
-        # if assign compute_dtype to fp32, loss will error
-        # if assign compute_dtype to bf16, lower_triangle_mask is fp32, will error
-        self.get_attention_mask = AttentionMask(
-            seq_length=config.seq_length, parallel_config=parallel_config.dp_mp_config).to_float(config.compute_dtype)
-
-        self.backbone = WizardCoderModel(config)
-        self.dtype = config.compute_dtype
-        self.head = WizardCoderHead(vocab_size=config.vocab_size,
-                                    compute_dtype=self.dtype,
-                                    parallel_config=self.config.parallel_config)
-
-        if parallel_config.pipeline_stage > 1:
-            self.head.pipeline_stage = parallel_config.pipeline_stage - 1
-            self.backbone.embedding.word_embedding.embedding_table.add_pipeline_stage(self.head.pipeline_stage)
-
-        vocab_size = config.vocab_size
-        loss_parallel_config = copy.deepcopy(parallel_config)
-
-        if vocab_size % mp != 0:
-            logger.warning("The vocab size of WizardCoder Loss is: %s, it is not divide by model_parallel: %s",
-                           vocab_size, mp)
-            logger.warning("Now, the model_parallel num of WizardCoder Loss will be changed: mp = 1")
-            loss_parallel_config.model_parallel = 1
-
-        self.loss = CrossEntropyLoss(parallel_config=loss_parallel_config, eps_const=1e-24)
-        self.reshape = P.Reshape()
-        self.shape = P.Shape()
-        self.cast = P.Cast()
-        self.load_checkpoint(config)
-        self.add = P.Add().shard(((dp, 1), ()))
-        self.mul = P.Mul().shard(((dp, 1), (dp, 1)))
-        self.tile = P.Tile()
-        self.gather = P.Gather()
-        self.concat = P.Concat(axis=-1)
-        self.ones = P.Ones()
-        self.compute_dtype = config.compute_dtype
-
-    def prepare_inputs_for_generation(self, input_ids, **kwargs):
-        input_position = kwargs.get("current_index", None)
-        if input_position is not None:
-            input_position = Tensor(input_position, mstype.int32)
-        return {
-            "input_ids": Tensor(input_ids, mstype.int32),
-            "input_position": input_position
-        }
-
-    def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
-        """Get Wizardcoder model input tuple for transform ckpt."""
-        input_ids = Tensor(input_ids, mstype.int32)
-        labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        return input_ids, labels, None, None, None, None
-
-    def add_flags_custom(self, is_first_iteration):
-        """Add customized attributes for specific cells in the model."""
-        self.add_flags(is_first_iteration=is_first_iteration)
-        self.backbone.add_flags(is_first_iteration=is_first_iteration)
-        for layer in self.backbone.blocks:
-            layer.add_flags(is_first_iteration=is_first_iteration)
-            layer.attention.add_flags(is_first_iteration=is_first_iteration)
-
-    def construct(self, input_ids, labels=None, input_mask=None, input_position=None,
-                  init_reset=True, batch_valid_length=None):
-        r"""
-            construct function for Language Modeling
-
-            Args:
-                input_ids (Tensor): the indices of input sequence tokens in the vocabulary.
-                labels (Tensor): the indices of labels in the vocabulary.
-
-            Returns:
-                logits (Tensor) or loss (mstype.float32): if is_training is False, directly return the logits,
-                                                         otherwise, return the computed loss.
-        """
-        batch_size, seq_length = self.shape(input_ids)
-        if self.use_past:
-            if not isinstance(init_reset, Tensor):
-                init_reset = Tensor([init_reset], mstype.bool_)
-            if not isinstance(batch_valid_length, Tensor):
-                batch_valid_length = self.ones((batch_size, 1), mstype.int32)
-
-        if self.training:
-            tokens = self.stridedslice(input_ids, (0, 0), (batch_size, seq_length - 1), (1, 1))
-        else:
-            tokens = input_ids
-
-        input_mask = self.cast(self.not_equal(tokens, self.pad_token), self.dtype)
-        attention_mask = self.get_attention_mask(input_mask)
-        # if do not cast to bf16, loss will error
-        attention_mask = self.cast(attention_mask, self.dtype)
-
-        # [batch_size, seq_length, vocab_size]
-        output_states, table = self.backbone(tokens, attention_mask, input_position, init_reset=init_reset,
-                                             batch_valid_length=batch_valid_length)
-        logits = self.head(output_states, table)
-
-        if not self.training:
-            logits = self.reshape(logits, (-1, logits.shape[-1]))
-            if (not self.use_past or self.is_first_iteration) and input_position is not None:
-                logits = self.gather(logits, input_position, 0)
-            # makes cast effective to avoid allgather issue in Mindspore1.10
-            input_mask = self.add(input_mask, 1)
-            # cast logits from bf16 to fp32 is caused by bf16 cannot asnumpy in text_generator.py
-            logits = self.cast(logits, mstype.float32)
-            return logits, tokens, input_mask
-
-        if labels is None:
-            labels = self.stridedslice(input_ids, (0, 1), (batch_size, seq_length), (1, 1))
-        else:
-            if self.training:
-                labels = self.stridedslice(labels, (0, 1), (batch_size, seq_length), (1, 1))
-            label_mask = self.cast(self.not_equal(labels, -100), self.dtype)
-            input_mask = self.mul(input_mask, label_mask)
-
-        labels = self.reshape(labels, (-1,))
-        input_mask = self.reshape(input_mask, (-1,))
-        # cast input_mask from bf16 to fp32 is caused by loss_reduce is fp32 in loss.py,
-        # if you do not change it, it will error in pynative mode, but it will run success in graph mode.
-        loss = self.loss(logits, labels, self.cast(input_mask, mstype.float32))
-
-        return loss
-
-
-class WizardCoderEmbeddingLayer(nn.Cell):
-    r"""The Embedding Layer of WizardCoder network."""
-
-    def __init__(self, config: WizardCoderConfig = None):
-        super(WizardCoderEmbeddingLayer, self).__init__()
-        parallel_config = copy.deepcopy(config.parallel_config)
-        embedding_mp = config.parallel_config.embedding_dp_mp_config.model_parallel
-        vocab_size = config.vocab_size
-        if vocab_size % embedding_mp != 0:
-            logger.warning("The vocab size of embedding layer is: %s, it is not divide by model_parallel: %s",
-                           vocab_size, embedding_mp)
-            logger.warning("Now, model_parallel will be changed: mp = 1")
-            parallel_config.embedding_dp_mp_config.model_parallel = 1
-
-        self.word_embedding = WizardCoderVocabEmbedding(vocab_size=vocab_size,
-                                                        embedding_size=config.embedding_size,
-                                                        param_init=initializer('normal',
-                                                                               [vocab_size, config.embedding_size],
-                                                                               dtype=config.param_init_type),
-                                                        parallel_config=parallel_config.embedding_dp_mp_config)
-        self.word_embedding.embedding_table.parallel_optimizer = True
-        new_parallel_config = copy.deepcopy(parallel_config)
-        new_parallel_config.vocab_emb_dp = True
-
-        self.position_embedding = WizardCoderVocabEmbedding(vocab_size=config.n_position,
-                                                            embedding_size=config.embedding_size,
-                                                            param_init=initializer('normal',
-                                                                                   [config.n_position,
-                                                                                    config.embedding_size],
-                                                                                   dtype=config.param_init_type),
-                                                            parallel_config=new_parallel_config.embedding_dp_mp_config)
-        dp = parallel_config.data_parallel
-        self.add = P.Add().shard(((dp, 1, 1), (dp, 1, 1)))
-        self.dropout = get_dropout(config.dropout_prob)
-        self.dropout.dropout.shard(((dp, 1, 1),))
-
-    def construct(self, input_ids, input_position):
-        """The forward compute of Embedding Layer."""
-        word_embedding, word_table = self.word_embedding(input_ids)
-        position_embedding, _ = self.position_embedding(input_position)
-        embedding = self.add(word_embedding, position_embedding)
-        embedding = self.dropout(embedding)
-        return embedding, word_table
-
-
-def set_parallel_configure_for_layer(network, layer_id, offset, parallel_config, layers, use_select_recompute):
-    r"""
-        Default setting for the pipeline is: `(layer_id + offset) // (layers / pipeline_stage)`.
-
-        Args:
-            network(Cell) - Represents the transformer block
-            parallel_config(dict) - Parallel Config
-            layer_id(int) - Means the layer index for the current module, counts from zero.
-            offset(int) - Means the layer_index needs a offset, if there are other modules in the net.
-            layers(int) - The total layers used for the model.
-    """
-    pp = parallel_config.pipeline_stage
-    pp_dis = max(int(np.ceil((layers - 1) / pp)), 1)
-    pp_remainder = layers % pp
-    if pp_remainder > 0 and pp_dis != 1:
-        if layer_id < (pp - pp_remainder) * (pp_dis - 1):
-            pp_dis = pp_dis - 1
-        else:
-            layer_id = layer_id + pp - pp_remainder
-
-    pp_id = min((layer_id + offset) // pp_dis, pp - 1)
-    network.pipeline_stage = pp_id
-
-    # Used for optimizer's fusion tag
-    dis = max(int((layers + 1) / parallel_config.gradient_aggregation_group), 1)
-    if pp > 1:
-        network.set_comm_fusion(2)
-    else:
-        network.set_comm_fusion(int((layer_id + offset) / dis) + 1)
-    if not use_select_recompute:
-        if isinstance(parallel_config.recompute, bool):
-            if parallel_config.recompute:
-                network.recompute()
-        else:
-            if parallel_config.recompute.recompute:
-                network.recompute(recompute_slice_activation=parallel_config.recompute.recompute_slice_activation)
-
-    else:
-        network.attention.set_select_recompute()
-
-
-class WizardCoderModel(WizardCoderPreTrainedModel):
-    """
-    The backbone of WizardCoder network
-
-    Args:
-        config(WizardCoderConfig): the config of network
-
-    Inputs:
-        input_ids: the tokenized inputs with datatype int32
-        input_mask: the mask indicating whether each position is a valid input
-
-    Returns:
-        output_state: Tensor, the output logit of backbone
-        present_layer: Tensor, the current feature map
-        embedding_table: Tensor, the embedding table for the vocabulary
-    """
-
-    def __init__(self, config):
-        super(WizardCoderModel, self).__init__(config)
-
-        self.embedding = WizardCoderEmbeddingLayer(config)
-        self.embedding.pipeline_stage = 0
-        self.cast_rec = P.Cast()
-        self.reshape_rec = P.Reshape()
-        self.config = config
-        self.is_first_iteration = True
-        self.use_past = config.use_past
-
-        self.layernorm = LayerNorm((config.embedding_size,), param_init_type=config.layernorm_dtype)
-        if config.parallel_config.pipeline_stage > 1:
-            self.layernorm.set_comm_fusion(2)
-        else:
-            self.layernorm.set_comm_fusion(config.parallel_config.gradient_aggregation_group)
-        self.layernorm.shard(((config.parallel_config.data_parallel, 1),))
-        self.layernorm.pipeline_stage = config.parallel_config.pipeline_stage - 1
-
-        if config.use_select_recompute:
-            self.layernorm.layer_norm.add_prim_attr("recompute_comm_op", True)
-
-        if not hasattr(config.parallel_config, "moe_config"):
-            config.parallel_config.moe_config = default_moe_config
-        moe_config = config.parallel_config.moe_config
-
-        self.blocks = nn.CellList()
-        for i in range(config.num_layers):
-            block = WizardCoderTransformerDecoderLayer(
-                hidden_size=config.embedding_size,
-                batch_size=config.batch_size,
-                ffn_hidden_size=config.embedding_size * config.expand_ratio,
-                seq_length=config.seq_length,
-                num_heads=config.num_heads,
-                attention_dropout_rate=config.attention_probs_dropout_prob,
-                hidden_dropout_rate=config.hidden_dropout_prob,
-                hidden_act=config.hidden_act,
-                use_past=config.use_past,
-                compute_dtype=config.compute_dtype,
-                param_init_type=config.param_init_type,
-                layernorm_compute_type=config.layernorm_dtype,
-                softmax_compute_type=config.softmax_dtype,
-                parallel_config=config.parallel_config.dp_mp_config,
-                use_seq_parallel=config.use_seq_parallel,
-                use_flash_attention=config.use_flash_attention,
-                moe_config=moe_config)
-            set_parallel_configure_for_layer(
-                block, layer_id=i, layers=config.num_layers,
-                offset=0, parallel_config=config.parallel_config,
-                use_select_recompute=config.use_select_recompute)
-            self.blocks.append(block)
-
-        self.tile = P.Tile().shard(((config.parallel_config.data_parallel,),))
-        self.dtype = config.compute_dtype
-        self.num_layers = config.num_layers
-        self.input_position = Tensor(np.arange(config.seq_length), mstype.int32)
-        self.bias = Tensor(np.arange(config.batch_size) * self.config.seq_length, mstype.int32)
-        self.shape = P.Shape()
-        self.reshape = P.Reshape()
-        self.sub = P.Sub()
-
-    def construct(self, input_ids, attention_mask, input_position=None, init_reset=False, batch_valid_length=None):
-        """wizardcoder model"""
-        batch_size, seq_length = self.shape(input_ids)
-        if input_position is None or self.is_first_iteration:
-            if batch_size == 1:
-                input_position = self.reshape_rec(self.input_position, (1, seq_length))
-            else:
-                input_position = self.tile(self.input_position, (batch_size, 1))
-        else:
-            bias = Tensor(np.arange(batch_size) * self.config.seq_length, mstype.int32)
-            input_position = self.sub(input_position, bias)
-            input_position = self.reshape(input_position, (batch_size, 1))
-        input_embedding, embedding_table = self.embedding(input_ids, input_position)
-
-        hidden_states = self.cast_rec(input_embedding, self.dtype)
-        hidden_shape = self.shape(hidden_states)
-        hidden_states = self.reshape_rec(hidden_states, (-1, hidden_shape[-1]))
-
-        for i in range(self.num_layers):
-            hidden_states = self.blocks[i](hidden_states, attention_mask, init_reset=init_reset,
-                                           batch_valid_length=batch_valid_length)
-        output_state = self.layernorm(hidden_states)
-        return output_state, embedding_table
-
-
-class WizardCoderHead(nn.Cell):
-    r"""Head for wizardcoder to get the logits of each token in the vocab."""
-
-    def __init__(self,
-                 vocab_size,
-                 compute_dtype,
-                 parallel_config=None):
-        super().__init__()
-        copied_parallel_config = copy.deepcopy(parallel_config)
-        mp = copied_parallel_config.model_parallel
-        if vocab_size % mp != 0:
-            logger.warning("The vocab size of WizardCoderHead MatMul is: %s, it is not divide by model_parallel: %s",
-                           vocab_size, mp)
-            logger.warning("Now, the model_parallel num of WizardCoderHead MatMul will be changed: mp = 1")
-            copied_parallel_config.model_parallel = 1
-
-        if copied_parallel_config.pipeline_stage > 1:
-            copied_parallel_config.vocab_emb_dp = False
-        dp, mp = copied_parallel_config.data_parallel, copied_parallel_config.model_parallel
-        if copied_parallel_config.vocab_emb_dp:
-            self.matmul = P.MatMul(transpose_b=True).shard(((dp, 1), (1, 1)))
-        else:
-            self.matmul = P.MatMul(transpose_b=True).shard(((dp, 1), (mp, 1)))
-        self.dtype = compute_dtype
-        self.cast = P.Cast()
-
-    def construct(self, state, table):
-        logits = self.matmul(self.cast(state, self.dtype), self.cast(table, self.dtype))
-        return logits
diff --git a/research/wizardcoder/wizardcoder_config.py b/research/wizardcoder/wizardcoder_config.py
deleted file mode 100644
index fd7e09b5..00000000
--- a/research/wizardcoder/wizardcoder_config.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Wizardcoder Config API."""
-
-from mindformers.modules.transformer.moe import MoEConfig
-from mindformers.modules.transformer.transformer import default_transformer_config, default_moe_config, \
-    TransformerOpParallelConfig
-
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.models.utils import convert_mstype
-from mindformers.models.configuration_utils import PretrainedConfig
-
-__all__ = ['WizardCoderConfig']
-
-
-@MindFormerRegister.register(MindFormerModuleType.CONFIG)
-class WizardCoderConfig(PretrainedConfig):
-    """
-    wizardcoder config class which defines the model size
-    """
-
-    model_type = "wizardcoder"
-
-    def __init__(self,
-                 dropout_prob: float = 0.1,
-                 batch_size: int = None,
-                 seq_length: int = 1024,
-                 n_position: int = 8192,
-                 vocab_size: int = 50257,
-                 hidden_size: int = 768,
-                 num_layers: int = 12,
-                 num_heads: int = 12,
-                 expand_ratio: int = 4,
-                 hidden_dropout_prob: float = 0.1,
-                 attention_probs_dropout_prob: float = 0.1,
-                 initializer_range: float = 0.02,
-                 eos_token: int = 0,
-                 pad_token: int = 49152,
-                 param_init_type: str = "float16",
-                 layernorm_dtype: str = "float32",
-                 softmax_dtype: str = "float16",
-                 compute_dtype: str = "float16",
-                 hidden_act: str = 'gelu',
-                 parallel_config: TransformerOpParallelConfig = default_transformer_config,
-                 use_past: bool = False,
-                 use_seq_parallel: bool = False,
-                 use_select_recompute: bool = False,
-                 checkpoint_name_or_path: str = "",
-                 moe_config: MoEConfig = default_moe_config,
-                 top_p=0.95,
-                 top_k=1,
-                 repetition_penalty=1,
-                 max_length=20,
-                 do_sample=False,
-                 start_token_id=1,
-                 eos_token_id=2,
-                 is_encoder_decoder=False,
-                 use_flash_attention=False,
-                 **kwargs):
-        super(WizardCoderConfig, self).__init__(**kwargs)
-        self.dropout_prob = dropout_prob
-        self.batch_size = batch_size
-        self.seq_length = seq_length
-        self.n_position = n_position
-        self.vocab_size = vocab_size
-        self.embedding_size = hidden_size
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-        self.expand_ratio = expand_ratio
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_probs_dropout_prob = attention_probs_dropout_prob
-        self.initializer_range = initializer_range
-        self.param_init_type = convert_mstype(param_init_type)
-        self.layernorm_dtype = convert_mstype(layernorm_dtype)
-        self.softmax_dtype = convert_mstype(softmax_dtype)
-        self.compute_dtype = convert_mstype(compute_dtype)
-        self.parallel_config = parallel_config
-        self.use_past = use_past
-        self.use_seq_parallel = use_seq_parallel
-        self.checkpoint_name_or_path = checkpoint_name_or_path
-        self.moe_config = moe_config
-        self.eos_token = eos_token
-        self.pad_token = pad_token
-        self.hidden_act = hidden_act
-        self.use_select_recompute = use_select_recompute
-        # Basic the configuration for the generation
-        self.top_p = top_p
-        self.top_k = top_k
-        self.repetition_penalty = repetition_penalty
-        self.max_length = max_length
-        self.start_token_id = start_token_id
-        self.eos_token_id = eos_token_id
-        self.is_encoder_decoder = is_encoder_decoder
-        self.do_sample = do_sample
-        self.use_flash_attention = use_flash_attention
diff --git a/research/wizardcoder/wizardcoder_modules.py b/research/wizardcoder/wizardcoder_modules.py
deleted file mode 100644
index 047bc62f..00000000
--- a/research/wizardcoder/wizardcoder_modules.py
+++ /dev/null
@@ -1,599 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Wizardcoder modules."""
-import numpy as np
-import mindspore.common.dtype as mstype
-from mindspore.common.tensor import Tensor
-from mindspore.common.parameter import Parameter
-from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.parallel._utils import _get_parallel_mode
-from mindspore.context import ParallelMode
-from mindspore import log as logger
-
-from mindformers.modules.flash_attention import FlashAttention
-
-from mindformers.modules.transformer.moe import default_moe_config
-from mindformers.modules.transformer import TransformerEncoderLayer, MultiHeadAttention, \
-    VocabEmbedding, TransformerOpParallelConfig, EmbeddingOpParallelConfig
-from mindformers.modules.transformer.op_parallel_config import default_dpmp_config
-from mindformers.modules.layers import Linear, LayerNorm
-
-default_transformer_config = TransformerOpParallelConfig()
-default_embedding_parallel_config = EmbeddingOpParallelConfig()
-
-
-class WizardCoderVocabEmbedding(VocabEmbedding):
-    def __init__(self, vocab_size, embedding_size, parallel_config=default_embedding_parallel_config,
-                 param_init='normal'):
-        super(WizardCoderVocabEmbedding, self).__init__(vocab_size, embedding_size, parallel_config, param_init)
-        dp, mp = parallel_config.data_parallel, parallel_config.model_parallel
-        if parallel_config.vocab_emb_dp:
-            self.gather = P.Gather().shard(((mp, 1), (dp, 1)))
-            logger.info(f"Using {dp} data parallel for the embedding lookup.")
-
-
-class MultiQueryAttention(MultiHeadAttention):
-    r"""
-        This is an implementation of multi query attention.
-        Supported Platforms:
-            ``Ascend``
-    """
-
-    def __init__(self, batch_size,
-                 src_seq_length,
-                 tgt_seq_length,
-                 hidden_size,
-                 num_heads,
-                 compute_dtype,
-                 softmax_compute_type,
-                 param_init_type,
-                 hidden_dropout_rate=0.1,
-                 attention_dropout_rate=0.1,
-                 use_past=False,
-                 use_seq_parallel=False,
-                 use_flash_attention=True,
-                 parallel_config=default_dpmp_config):
-        super(MultiQueryAttention, self).__init__(batch_size,
-                                                  src_seq_length,
-                                                  tgt_seq_length,
-                                                  hidden_size,
-                                                  num_heads,
-                                                  hidden_dropout_rate,
-                                                  attention_dropout_rate,
-                                                  compute_dtype,
-                                                  softmax_compute_type,
-                                                  param_init_type,
-                                                  use_past,
-                                                  parallel_config)
-        if not self._is_ascend:
-            raise ValueError("For 'MultiQueryAttention', now only support Ascend")
-        self.compute_dtype = compute_dtype
-        self.is_parallel_mode = _get_parallel_mode() in (
-            ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL)
-        dp, mp = parallel_config.data_parallel, parallel_config.model_parallel
-        if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
-            if use_seq_parallel:
-                self.projection.shard(strategy_bias=((dp, 1), (1,)),
-                                      strategy_matmul=((dp, mp), (mp, 1)),
-                                      out_strategy_matmul=((dp * mp, 1),))
-                logger.info("Enabling matmul recompuation when seq parallel enabled")
-                self.projection.matmul.add_prim_attr("recompute", True)
-                self.projection.matmul.add_prim_attr("recompute_comm_op", True)
-        else:
-            if use_seq_parallel:
-                self.dropout.dropout.shard(((dp * mp, 1),))
-                self.projection.shard(
-                    strategy_bias=((dp * mp, 1), (1,)),
-                    strategy_matmul=((dp, mp), (mp, 1)),
-                    out_strategy_matmul=((dp * mp, 1),))
-                logger.info("Enabling matmul recompuation when seq parallel enabled")
-                self.projection.matmul.add_prim_attr("recompute", True)
-                self.projection.matmul.add_prim_attr("recompute_comm_op", True)
-
-            self.batch_matmul = P.BatchMatMul().shard(((dp, mp, 1, 1), (dp, 1, 1, 1)))
-
-            self.kv_heads = 1
-            self.kv_dim = self.kv_heads * self.size_per_head
-
-            self.transpose_one_head = P.Transpose().shard(((dp, 1, 1, 1),))
-            self.tile_for_batch_matmul = P.Tile().shard(((dp, mp, 1, 1),))
-            self.real_div_one_head = P.RealDiv().shard(((dp, 1, 1, 1), ()))
-            # Query
-            self.dense1 = Linear(hidden_size,
-                                 hidden_size,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type)
-            self.dense1.shard(strategy_matmul=((dp, 1), (mp, 1)),
-                              strategy_bias=((dp, mp), (mp,)))
-            old_mp = parallel_config.model_parallel
-            parallel_config.model_parallel = 1
-            # Key
-            self.dense2 = Linear(hidden_size,
-                                 self.kv_dim,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type)
-            self.dense2.shard(strategy_matmul=((parallel_config.data_parallel, 1), (parallel_config.model_parallel, 1)),
-                              strategy_bias=((parallel_config.data_parallel, parallel_config.model_parallel),
-                                             (parallel_config.model_parallel,)))
-            self.dense2.weight.parallel_optimizer = False
-
-            # Value
-            self.dense3 = Linear(hidden_size,
-                                 self.kv_dim,
-                                 compute_dtype=compute_dtype,
-                                 param_init_type=param_init_type)
-            self.dense3.shard(strategy_matmul=((parallel_config.data_parallel, 1), (parallel_config.model_parallel, 1)),
-                              strategy_bias=((parallel_config.data_parallel, parallel_config.model_parallel),
-                                             (parallel_config.model_parallel,)))
-
-            self.dense3.weight.parallel_optimizer = False
-            parallel_config.model_parallel = old_mp
-            self.cast_rec = P.Cast()
-            self.reshape_rec = P.Reshape()
-        self.flash_attention_flag = use_flash_attention
-        if self.flash_attention_flag:
-            self.flash_attention = FlashAttention(self.size_per_head, attention_dropout_rate, prev_block_num=65536,
-                                                  next_block_num=0, tiling_stgy_name="sparse",
-                                                  dp=parallel_config.data_parallel, mp=parallel_config.model_parallel)
-            self.flash_attention.shard(((parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),
-                                        (parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),
-                                        (parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),
-                                        (parallel_config.data_parallel, 1, 1),
-                                        (parallel_config.data_parallel, parallel_config.model_parallel, 1, 1)))
-            self.flash_attention.drop_gen_mask.recompute(False)
-            self.flash_attention.fill_v2.recompute(False)
-            self.flash_attention.flash_attention.recompute(False)
-        self.squeeze = P.Squeeze(1)
-        logger.info("dp_num = {}, mp_num = {}".format(parallel_config.data_parallel, parallel_config.model_parallel))
-        logger.info("Using FlashAttention in this round of operation = ", self.flash_attention_flag)
-        self.reshape = P.Reshape()
-        self.cast = P.Cast()
-        self.shape = P.Shape()
-        self.get_dtype = P.DType()
-
-    def set_select_recompute(self):
-        """operator select recompute"""
-        self.batch_matmul.recompute()
-        self.real_div.recompute()
-        self.real_div_one_head.recompute()
-        self.sub.recompute()
-        self.add.recompute()
-        self.prob_dropout.dropout.recompute()
-        self.softmax_3d.softmax.recompute()
-        self.softmax.softmax.recompute()
-        self.cast_rec.recompute()
-        self.mul.recompute()
-        self.reshape_rec.recompute()
-
-    def construct(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
-                  value_past=None, batch_valid_length=None):
-        """Forward process of the MultiQueryAttention"""
-        self._check_inputs(query_tensor, key_tensor, value_tensor, attention_mask, key_past,
-                           value_past, batch_valid_length)
-        ori_shape = self.shape(query_tensor)
-        batch_size = self._get_batch_size_from_query(query_tensor)
-        query_tensor, key_tensor, value_tensor = self._convert_to_2d_tensor(query_tensor,
-                                                                            key_tensor,
-                                                                            value_tensor)
-        ori_dtype = self.get_dtype(query_tensor)
-        query_tensor = self.cast(query_tensor, self.dtype)
-        key_tensor = self.cast(key_tensor, self.dtype)
-        value_tensor = self.cast(value_tensor, self.dtype)
-        # multi query attention: query, key, value are derived from the same inputs
-        query = self.dense1(query_tensor)
-        key = self.dense2(key_tensor)
-        value = self.dense3(value_tensor)
-        # the returned shape is [bs, num_heads, seq_length, size_per_head]
-        query = self.transpose(
-            self.reshape(
-                query,
-                (batch_size, self._get_seq_length_under_incremental(self.src_seq_length),
-                 self.n_head, self.size_per_head)),
-            (0, 2, 1, 3))
-        # the returned shape is [bs, size_per_head, seq_length, num_heads]
-        if self.flash_attention_flag:
-            key = self.transpose_one_head(
-                self.reshape(
-                    key,
-                    (batch_size, self._get_seq_length_under_incremental(self.tgt_seq_length),
-                     self.kv_heads, self.size_per_head)),
-                (0, 2, 1, 3))
-        else:
-            key = self.transpose_one_head(
-                self.reshape(
-                    key,
-                    (batch_size, self._get_seq_length_under_incremental(self.tgt_seq_length),
-                     self.kv_heads, self.size_per_head)),
-                (0, 2, 3, 1))
-        # the returned shape is [bs, num_heads, seq_length, size_per_head]
-        value = self.transpose_one_head(
-            self.reshape(
-                value,
-                (batch_size, self._get_seq_length_under_incremental(self.tgt_seq_length),
-                 self.kv_heads, self.size_per_head)),
-            (0, 2, 1, 3))
-
-        # support input shape is [bs, seq, seq] or [bs, heads, seq, seq]
-        if attention_mask is not None and self.flash_attention_flag is False and len(self.shape(attention_mask)) == 3:
-            attention_mask = self.expand_dims(attention_mask, 1)
-        if attention_mask is not None and self.flash_attention_flag is True and len(self.shape(attention_mask)) == 4:
-            attention_mask = self.squeeze(attention_mask)
-        # key and value for current token(s)
-        key_present = key
-        value_present = value
-        if self.use_past:
-            # The first graph with the input size of (bs, seq_length)
-            if self.is_first_iteration:
-                # Get the valid input length without padding
-                valid_length_vector = self.cast(self.less(self.range, batch_valid_length.view(-1, 1, 1)), self.dtype)
-                # Cover the key and value numbers corresponding to the padding position
-                key_present = self.mul1(key, self.expand_dims(valid_length_vector, 2))
-                value_present = self.mul1(value, self.expand_dims(valid_length_vector, 3))
-            # The second graph with the inpus size of (bs, 1)
-            # the shape of query is (bs, num_heads, 1, size_per_head)
-            # the shape of key is   (bs, num_heads, size_per_head, 1)
-            # the shape of value is (bs, num_heads, 1, size_per_head)
-            else:
-                # Get the current token position index
-                valid_length = batch_valid_length - 1
-                valid_length = self.reshape(valid_length, (-1, 1, 1))
-                valid_length_vector = self.cast(self.equal(valid_length, self.range), self.dtype)
-                # Pad the key and value to seq_length with only the position index not zero
-                current_key = self.mul1(self.tile(key, (1, 1, 1, self.seq_length)),
-                                        self.expand_dims(valid_length_vector, 2))
-                current_value = self.mul1(self.tile(value, (1, 1, self.seq_length, 1)),
-                                          self.expand_dims(valid_length_vector, 3))
-                # Concat the previous saved state and current state
-                key = self.add(key_past, current_key)
-                value = self.add(value_past, current_value)
-                # Update key_present and value_present for state update
-                key_present = key
-                value_present = value
-                attention_mask = self.reshape(self.attention_mask, (self.seq_length, self.seq_length, 1, 1))
-
-        layer_present = (key_present, value_present)
-        # multi head attention considering attention mask
-        # the return shape is [bs * seq_length, hidden_size]
-        if self.flash_attention_flag:
-            key = self.tile_for_batch_matmul(key, (1, self.n_head, 1, 1))
-            value = self.tile_for_batch_matmul(value, (1, self.n_head, 1, 1))
-            attention = self.flash_attention(query, key, value, attention_mask)
-            attention = self._merge_heads(attention)
-        else:
-            attention = self._attn(query, key, value, attention_mask)
-        # Output
-        output = self.projection(attention)
-        output = self.dropout(output)
-        output = self.reshape(output, ori_shape)
-        output = self.cast(output, ori_dtype)
-        return output, layer_present
-
-    def _softmax(self, attention_scores):
-        """
-        For the consideration of the performance, do softmax according to different situations
-        :param attention_scores: a 3d tensor before softmax
-        :return: the attention scores.
-        """
-
-        if self._is_ascend and self.softmax_dtype == mstype.float16 or not self._is_ascend:
-            attention_probs = self.softmax(attention_scores)
-        else:
-            shape = self.shape(attention_scores)
-            # attention probs
-            attention_probs = self.softmax_3d(
-                self.reshape_rec(attention_scores,
-                                 (shape[0], -1, shape[-1])))
-            attention_probs = self.reshape_rec(attention_probs, shape)
-        return attention_probs
-
-    def _attn(self, query, key, value, attention_mask):
-        """
-        Get the weighted score along the seq_length
-
-        Inputs:
-            query: the query matrix
-            key: the key matrix
-            value: the value matrix
-            attention_mask: the attention mask matrix with shape (batch_size,
-            1, seq_length, seq_length)
-        Outputs:
-            weighted_values: Tensor, the weighted sum scores
-        """
-        # Normalize query and key before MatMul, default off
-        # Attention score [bs, num_heads, seq_length, seq_length]
-
-        factor = self.cast(self.scale_factor, self.get_dtype(query))
-        query = self.real_div(query, factor)
-        key = self.real_div_one_head(key, factor)
-        query = self.cast(query, self.compute_dtype)
-        key = self.cast(key, self.compute_dtype)
-        score = self.batch_matmul(query, key)
-
-        ori_dtype = self.get_dtype(score)
-        attention_scores = self.cast_rec(score, self.softmax_dtype)
-
-        # for input size of (bs, 1) namely the second graph,
-        # the shape of attention_mask matrix should be (bs, 1, 1, seq_length)
-        if attention_mask is not None:
-            if self.use_past and not self.is_first_iteration:
-                # Calculate the current total token
-                bs, *_ = self.shape(query)
-                tmp = self.not_equal(self.slice(key, (0, 0, 0, 0), (bs, 1, 1, self.seq_length), (1, 1, 1, 1)), 0)
-                current_index = self.reducesum(self.cast(tmp, mstype.float32), (1, 2, 3))
-                # Get the precise position index
-                index = self.sub1(self.cast(current_index, mstype.int32), 1)
-                index = self.reshape(index, (-1, 1, 1))
-                # Calculate the attention_mask matrix via the position index
-                attention_mask = self.cast(self.tensor_le(self.range, index), mstype.int32)
-                attention_mask = self.expand_dims(attention_mask, 2)
-            # Minus 10000 for the position where masked to exclude them from softmax
-            multiplu_out = self.sub(
-                self.cast(F.tuple_to_array((1.0,)), self.get_dtype(attention_scores)),
-                self.cast_rec(attention_mask, self.get_dtype(attention_scores)))
-
-            adder = self.mul(multiplu_out, self.multiply_data)
-            attention_scores = self.add(adder, attention_scores)
-
-        # attention probs
-        attention_probs = self._softmax(attention_scores)
-        attention_probs = self.cast_rec(attention_probs, ori_dtype)
-
-        attention_probs = self.prob_dropout(attention_probs)
-
-        # Weighted sum output [bs, num_heads, seq_length, size_per_head]
-        attention_probs = self.cast(attention_probs, self.compute_dtype)
-        value = self.cast(value, self.compute_dtype)
-        weighted_values = self.batch_matmul(attention_probs, value)
-        attention_merge = self._merge_heads(weighted_values)
-        return attention_merge
-
-
-class WizardCoderTransformerDecoderLayer(TransformerEncoderLayer):
-    r"""WizardCoder Transformer Decoder Layer.
-
-        Args:
-            batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
-                value. When do training or prediction, the argument will not work and the user can just pass None to
-                the argument.
-            hidden_size(int): The hidden size of the input.
-            ffn_hidden_size(int): The hidden size of bottleneck in the feedforward layer.
-            num_heads(int): The number of the heads.
-            seq_length(int): The input sequence length.
-            attention_dropout_rate(float): The dropout rate of the attention scores. Default:0.1.
-            hidden_dropout_rate(float): The dropout rate of the final output of the layer. Default:0.1.
-            post_layernorm_residual(bool): Do residuals adds before the layernorm. Default False.
-            layernorm_compute_type(dtype.Number): The computation type of the layernorm.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            softmax_compute_type(dtype.Number): The computation type of the softmax in the attention.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            param_init_type(dtype.Number): The parameter initialization type of the module.
-                Should be mstype.float32 or mstype.float16. Default mstype.float32.
-            hidden_act (str, nn.Cell): The activation of the internal feedforward layer. Supports 'relu',
-                'relu6', 'tanh', 'gelu', 'fast_gelu', 'elu', 'sigmoid', 'prelu', 'leakyrelu', 'hswish',
-                'hsigmoid', 'logsigmoid' and so on. User can provide custom activition to the argument.
-                If user wants to run the net in the parallel mode, the custom activation must also provide
-                the `activation_shard` function. Please see the examples of the
-                class:`mindformers.modules.transformer.FeedForward`. Default: gelu.
-            use_past(bool): Use the past state to compute, used for incremental prediction. For example, if we have two
-                words and want to generate the ten more words. We just need to compute the two words' state only once,
-                and generate the next word one by one. When use_past is True, there are two steps to run the prediction.
-                In the first step, set the is_first_iteration to be True by
-                `model.add_flags_recursive(is_first_iteration=True)`, and pass the full inputs. Then, set the
-                is_first_iteration to be False by `model.add_flags_recursive(is_first_iteration=False)`.
-                At this moment, pass the single step's input tensor, and loop it. Default False.
-            moe_config(MoEConfig): The configuration of MoE (Mixture of Expert). Default is an instance of MoEConfig
-                with default values. Please see `MoEConfig`.
-            parallel_config(OpParallelConfig, MoEParallelConfig): The parallel configure. When MoE is applied,
-                MoEParallelConfig is effective, otherwise OpParallelConfig is effective. Default `default_dpmp_config`,
-                an instance of `OpParallelConfig` with default args.
-
-        Inputs:
-            - **x** (Tensor) - Float Tensor, shape should be [batch_size, seq_length, hidden_size] or
-              [batch_size * seq_length, hidden_size], if the use_past is False or is_first_iteration=True. Otherwise,
-              should be [batch_size, 1, hidden_size]
-            - **input_mask** (Tensor) - Float Tensor, If the use_past is False or is_first_iteration=True,
-              the attention mask matrix should ba [batch_size, seq_length, seq_length], or None. None means there will
-              be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
-            - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
-              past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
-            - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
-              Used for incremental prediction when the use_past is True. Default None.
-
-        Outputs:
-            Tuple, a tuple contains(`output`, `layer_present`).
-
-            - **output** (Tensor) - The float tensor of the output of the layer with
-              shape (batch_size, seq_length, hidden_size) or (batch_size * seq_length, hidden_size), if the use_past is
-              False or is_first_iteration=True. Otherwise, it will be (batch_size, 1, hidden_size)
-
-            - **layer_present** (Tuple) - A tuple of the Tensor of the projected key and value vector with
-              ((batch_size, num_heads, size_per_head, seq_length),
-              (batch_size, num_heads, seq_length, size_per_head)).
-
-        Supported Platforms:
-            ``Ascend`` ``GPU``
-    """
-
-    def __init__(self,
-                 batch_size,
-                 hidden_size,
-                 ffn_hidden_size,
-                 num_heads,
-                 seq_length,
-                 compute_dtype,
-                 layernorm_compute_type,
-                 softmax_compute_type,
-                 param_init_type,
-                 attention_dropout_rate=0.1,
-                 hidden_dropout_rate=0.1,
-                 post_layernorm_residual=False,
-                 hidden_act='gelu',
-                 use_past=False,
-                 use_seq_parallel=False,
-                 use_flash_attention=True,
-                 moe_config=default_moe_config,
-                 parallel_config=default_dpmp_config):
-        super(WizardCoderTransformerDecoderLayer, self).__init__(
-            batch_size=batch_size,
-            hidden_size=hidden_size,
-            ffn_hidden_size=ffn_hidden_size,
-            num_heads=num_heads,
-            seq_length=seq_length,
-            attention_dropout_rate=attention_dropout_rate,
-            hidden_dropout_rate=hidden_dropout_rate,
-            post_layernorm_residual=post_layernorm_residual,
-            layernorm_compute_type=layernorm_compute_type,
-            softmax_compute_type=softmax_compute_type,
-            param_init_type=param_init_type,
-            compute_dtype=compute_dtype,
-            hidden_act=hidden_act,
-            use_past=use_past,
-            moe_config=moe_config,
-            parallel_config=parallel_config
-        )
-        self.is_first_iteration = True
-        self.layernorm1 = LayerNorm((hidden_size,), param_init_type=layernorm_compute_type)
-        self.layernorm2 = LayerNorm((hidden_size,), param_init_type=layernorm_compute_type)
-        dp, mp = parallel_config.data_parallel, parallel_config.model_parallel
-        if _get_parallel_mode() not in (ParallelMode.AUTO_PARALLEL,):
-            if use_seq_parallel:
-                self.add.shard(((dp * mp, 1), (dp * mp, 1)))
-                self.layernorm1.shard(((dp * mp, 1),))
-                self.layernorm2.shard(((dp * mp, 1),))
-                if not self.use_moe:
-                    self.output.projection.shard(
-                        strategy_bias=((dp * mp, 1), (1,)),
-                        strategy_matmul=((dp, mp), (mp, 1)),
-                        out_strategy_matmul=((dp * mp, 1),))
-                    self.output.dropout.dropout.shard(((dp * mp, 1),))
-            self.output.projection.matmul.add_prim_attr("recompute_comm_op", True)
-            self.layernorm1.layer_norm.add_prim_attr("recompute_comm_op", True)
-            self.layernorm2.layer_norm.add_prim_attr("recompute_comm_op", True)
-        attention_parallel_config = parallel_config.dpmp if self.use_moe else parallel_config
-        self.attention = MultiQueryAttention(batch_size=batch_size,
-                                             src_seq_length=seq_length,
-                                             tgt_seq_length=seq_length,
-                                             hidden_size=hidden_size,
-                                             num_heads=num_heads,
-                                             hidden_dropout_rate=hidden_dropout_rate,
-                                             attention_dropout_rate=attention_dropout_rate,
-                                             compute_dtype=compute_dtype,
-                                             softmax_compute_type=softmax_compute_type,
-                                             param_init_type=param_init_type,
-                                             use_past=use_past,
-                                             use_seq_parallel=use_seq_parallel,
-                                             use_flash_attention=use_flash_attention,
-                                             parallel_config=attention_parallel_config)
-
-        self.dtype = compute_dtype
-        self.reshape = P.Reshape()
-        self.shape = P.Shape()
-        self.cast = P.Cast()
-        self.depend = P.Depend()
-        if self.use_past:
-            size_per_head = hidden_size // num_heads
-            self.key_shape = (batch_size, 1, size_per_head, seq_length)
-            self.value_shape = (batch_size, 1, seq_length, size_per_head)
-            # parameters saving key and value states
-            self.key_past = Parameter(Tensor(np.zeros(shape=self.key_shape), self.dtype), name="key_past")
-            self.value_past = Parameter(Tensor(np.zeros(shape=self.value_shape), self.dtype), name="value_past")
-
-    def construct(self, x, input_mask=None, init_reset=True, batch_valid_length=None):
-        """forward process"""
-        self._check_input(x, input_mask, init_reset, batch_valid_length)
-        x_shape = self.shape(x)
-        x = self.reshape(x, (-1, x_shape[-1]))
-        if self.post_layernorm_residual:
-            input_x = x
-        else:
-            input_x = self.layernorm1(x)
-        input_x = self.cast(input_x, self.dtype)
-
-        # indicate whether reset saved states
-        key_reset = None
-        value_reset = None
-
-        if self.use_past and self.is_first_iteration:
-            # reset states, init_reset True for reuse and False for reset
-            self.assign(self.key_past, self.mul(self.key_past, self.cast(init_reset, self.dtype)))
-            key_reset = self.key_past
-            self.assign(self.value_past, self.mul(self.value_past, self.cast(init_reset, self.dtype)))
-            value_reset = self.value_past
-            # add dependency for desired execution order
-            input_x = self.depend(input_x, key_reset)
-            input_x = self.depend(input_x, value_reset)
-        attention, layer_present = self.attention(input_x, input_x, input_x, input_mask,
-                                                  self.key_past, self.value_past, batch_valid_length)
-
-        # For post-layernorm the inputs for residual path are output of self-attention and output of layernorm
-        if self.post_layernorm_residual:
-            x = self.add(input_x, attention)
-        # For pre-layernorm the inputs for residual path are output of self-attention and input of this layer
-        else:
-            x = self.cast(x, self.dtype)
-            x = self.add(x, attention)
-
-        output_x = self.layernorm2(x)
-        output_x = self.cast(output_x, self.dtype)
-        aux_loss = None
-        # feedforwad construct dtype should be set as bf16 or fp32
-        if self.use_moe:
-            mlp_logit, aux_loss = self.output(output_x)
-        else:
-            mlp_logit = self.output(output_x)
-
-
-        value_update = None
-        key_update = None
-        if self.use_past:
-            # current key and value
-            key_present, value_present = layer_present
-            self.assign(self.key_past, key_present)
-            key_update = self.key_past
-            self.assign(self.value_past, value_present)
-            value_update = self.value_past
-            # add dependency for desired execution order
-            key_update = self.depend(key_update, key_reset)
-            value_update = self.depend(value_update, value_reset)
-
-        # add dependency for desired execution order
-        mlp_logit = self.depend(mlp_logit, value_update)
-        mlp_logit = self.depend(mlp_logit, key_update)
-
-        # if shape is 3d, we reshape the inputs of the add
-        if len(x_shape) == 3:
-            output_x = self.reshape(output_x, x_shape)
-            mlp_logit = self.reshape(mlp_logit, x_shape)
-            x = self.reshape(x, x_shape)
-
-            if self.post_layernorm_residual:
-                output = self.add_3d(output_x, mlp_logit)
-                output = self.reshape(output, (-1, x_shape[-1]))
-                output = self.layernorm1(output)
-                output = self.reshape(output, x_shape)
-            else:
-                output = self.add_3d(x, mlp_logit)
-        else:
-            if self.post_layernorm_residual:
-                output = self.add(output_x, mlp_logit)
-                output = self.layernorm1(output)
-            else:
-                output = self.add(x, mlp_logit)
-            output = self.reshape(output, x_shape)
-
-        if self.use_moe:
-            return output, aux_loss
-        return output
diff --git a/research/wizardcoder/wizardcoder_preprocess.py b/research/wizardcoder/wizardcoder_preprocess.py
deleted file mode 100644
index a2c30103..00000000
--- a/research/wizardcoder/wizardcoder_preprocess.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-transform wizardcoder-format dataset to mindrecord.
-"""
-import os
-import argparse
-import json
-import copy
-import numpy as np
-
-from mindspore.mindrecord import FileWriter
-from wizardcoder_tokenizer import WizardCoderTokenizer
-
-
-IGNORE_INDEX = -100
-
-PROMPT_DICT = {
-    "prompt_input": (
-        "Below is an instruction that describes a task, paired with an input that provides further context. "
-        "Write a response that appropriately completes the request.\n\n"
-        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
-    ),
-    "prompt_no_input": (
-        "Below is an instruction that describes a task. "
-        "Write a response that appropriately completes the request.\n\n"
-        "### Instruction:\n{instruction}\n\n### Response:"
-    ),
-}
-
-
-def _tokenize_fn(strings, tokenizer):
-    """Tokenize a list of strings."""
-    tokenized_list = [
-        tokenizer(
-            text,
-            return_tensors='np',
-            padding="longest",
-            max_length=tokenizer.model_max_length,
-            truncation=True,
-        )
-        for text in strings
-    ]
-
-    input_ids = labels = [tokenized.input_ids for tokenized in tokenized_list]
-
-    input_ids_lens = labels_lens = [
-        np.not_equal(tokenized.input_ids, tokenizer.pad_token_id).sum() for tokenized in tokenized_list
-    ]
-
-    return dict(
-        input_ids=input_ids,
-        labels=labels,
-        input_ids_lens=input_ids_lens,
-        labels_lens=labels_lens,
-    )
-
-
-def preprocess(sources, targets, tokenizer, max_length):
-    """Preprocess the data by tokenizing."""
-    examples = [s + t for s, t in zip(sources, targets)]
-    examples_tokenized, sources_tokenized = [_tokenize_fn(strings, tokenizer) for strings in (examples, sources)]
-    input_ids = examples_tokenized["input_ids"]
-    labels = copy.deepcopy(input_ids)
-    for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]):
-        label[:source_len] = IGNORE_INDEX
-
-    final_input_ids, final_labels = [], []
-    for input_id_tensor, label_tensor in zip(input_ids, labels):
-        input_id = input_id_tensor.tolist()
-        label = label_tensor.tolist()
-        if len(input_id) > max_length:
-            input_id = input_id[: max_length]
-            label = label[: max_length]
-        else:
-            input_id += [tokenizer.pad_token_id] * (max_length - len(input_id))
-            label += [IGNORE_INDEX] * (max_length - len(label))
-        final_input_ids.append(np.array(input_id).astype(np.int32))
-        final_labels.append(np.array(label).astype(np.int32))
-
-    return dict(input_ids=final_input_ids, labels=final_labels)
-
-
-def data_tokenize_function(raw_datas, tokenizer, max_length):
-    """Preprocess the data by formatting and preprocessing."""
-    prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
-    sources, targets = [], []
-    for example in raw_datas:
-        if 'input' in example:
-            instruction, input_query = example['instruction'], example['input']
-            source = prompt_input.format_map(dict(instruction=instruction, input=input_query)) if input_query != "" \
-                    else prompt_no_input.format_map(dict(instruction=instruction))
-
-        else:
-            instruction = example['instruction']
-            source = prompt_no_input.format_map(dict(instruction=instruction))
-        target = f"{example['output']}{tokenizer.eos_token}"
-        sources.append(source)
-        targets.append(target)
-
-    data_dict = preprocess(sources, targets, tokenizer, max_length)
-    return data_dict
-
-
-class SupervisedDataset:
-    """Dataset for supervised fine-tuning."""
-
-    def __init__(self, raw_data, tokenizer, max_length):
-        super(SupervisedDataset, self).__init__()
-
-        data_dict = data_tokenize_function(raw_data, tokenizer, max_length)
-
-        self.input_ids = data_dict["input_ids"]
-        self.labels = data_dict["labels"]
-
-    def __len__(self):
-        return len(self.input_ids)
-
-    def __getitem__(self, i):
-        return dict(
-            input_ids=self.input_ids[i],
-            labels=self.labels[i]
-        )
-
-
-def tokenize_qa(tokenizer, file_path, max_length, if_jsonl=True):
-    """json or jsonl Dataset handling function"""
-
-    if not if_jsonl:
-        raw_data = json.load(open(file_path, "r"))
-    else:
-        raw_data = []
-        for line in open(file_path, 'r'):
-            raw_data.append(json.loads(line))
-    dataset_cls = SupervisedDataset(raw_data, tokenizer, max_length)
-    for i in range(len(dataset_cls)):
-        yield dataset_cls[i]
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--mindrecord_schema", type=str, default="wizardcoder")
-    parser.add_argument("--input_glob", type=str, default="EvolInstruct-Code-80k_1.json")
-    parser.add_argument("--output_file", type=str, default="EvolInstruct.mindrecord")
-    parser.add_argument("--vocab_file", type=str, default="vocab.json")
-    parser.add_argument("--merge_file", type=str, default="merges.txt")
-    parser.add_argument("--file_partition", type=int, default=1)
-    parser.add_argument("--seq_length", type=int, default=2048)
-    args = parser.parse_args()
-
-    out_dir, out_file = os.path.split(os.path.abspath(args.output_file))
-    if not os.path.exists(out_dir):
-        os.mkdir(out_dir)
-
-    schema = {'input_ids': {"type": "int32", "shape": [-1]},
-              'labels': {"type": "int32", "shape": [-1]}}
-
-    writer = FileWriter(file_name=args.output_file, shard_num=args.file_partition)
-    writer.add_schema(schema, args.mindrecord_schema)
-
-    # Start to load tokenizer
-    if not os.path.exists(args.vocab_file):
-        raise FileNotFoundError(f"file {args.vocab_file} do not exists.")
-    if not os.path.exists(args.merge_file):
-        raise FileNotFoundError(f"file {args.merge_file} do not exists.")
-
-    transforms_count = 0
-
-    word_tokenizer = WizardCoderTokenizer(vocab_file=args.vocab_file, merge_file=args.merge_file,
-                                          model_max_length=args.seq_length + 1)
-    for x in tokenize_qa(word_tokenizer, args.input_glob, args.seq_length + 1):
-        transforms_count += 1
-        writer.write_raw_data([x])
-    print("Transformed {} records.".format(transforms_count))
-
-    writer.commit()
-    out_file = args.output_file
-    if args.file_partition > 1:
-        out_file += '0'
-    print("Transform finished, output files refer: {}".format(out_file))
diff --git a/research/wizardcoder/wizardcoder_tokenizer.py b/research/wizardcoder/wizardcoder_tokenizer.py
deleted file mode 100644
index 3163cfd5..00000000
--- a/research/wizardcoder/wizardcoder_tokenizer.py
+++ /dev/null
@@ -1,261 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""wizardcoder Tokenizer"""
-import json
-from functools import lru_cache
-from typing import List, Optional
-import os
-import regex as re
-
-from mindformers.tools.register import MindFormerRegister, MindFormerModuleType
-from mindformers.models.tokenization_utils import PreTrainedTokenizer
-
-
-__all__ = ['WizardCoderTokenizer']
-
-
-@lru_cache()
-def bytes_to_unicode():
-    """
-    bytes to unicode
-    """
-    bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
-    cs = bs[:]
-    n = 0
-    for b in range(2 ** 8):
-        if b not in bs:
-            bs.append(b)
-            cs.append(2 ** 8 + n)
-            n += 1
-    cs = [chr(i) for i in cs]
-    return dict(zip(bs, cs))
-
-
-def get_pairs(word):
-    """
-    Return set of symbol pairs in a word.
-    Word is represented as tuple of symbols (symbols being variable-length strings).
-    """
-    pairs = set()
-    prev_char = word[0]
-    for char in word[1:]:
-        pairs.add((prev_char, char))
-        prev_char = char
-    return pairs
-
-
-@MindFormerRegister.register(MindFormerModuleType.TOKENIZER)
-class WizardCoderTokenizer(PreTrainedTokenizer):
-    r"""
-    Tokenize the input string and convert them into the ids. The tokenizer use the sentence piece internally.
-
-    Args:
-        vocab_file(str): The vocabulary file path.
-        merge_file(str): The merge file path.
-        unk_token(str): The token that represents the unknown. Default "<|endoftext|>".
-        bos_token(str): The token that represents the begin-of-sentence. Default "<|endoftext|>".
-        eos_token(str): The token that represents the end-of-sentence. Default "<|endoftext|>".
-        add_prefix_space(bool): whether to add a whitespace in the front of text. Default "False"
-        **kwargs: Other kwargs that will be passed into the base class of the `Tokenizer`.
-
-    Examples:
-        >>> from research.wizardcoder.wizardcoder_tokenizer import WizardCoderTokenizer
-        >>> tokenizer = WizardCoderTokenizer("vocab.json", "merges.txt")
-        >>> res = tokenizer("Hello world")
-        >>> print(res)
-    {'input_ids': [8279, 5788], 'token_type_ids': [0, 0], 'attention_mask': [1, 1]}
-
-    Outputs:
-        A dict contains the processed ids, attention_mask that specific by the member `MODEL_INPUT_NAME`
-        of the subclass.
-    """
-    VOCAB_FILES = {'merge_file': 'merges.txt', 'vocab_file': 'vocab.json'}
-    FILE_LIST = ['tokenizer_config.json']
-
-    def __init__(
-            self,
-            vocab_file,
-            merge_file,
-            unk_token="<|endoftext|>",
-            bos_token="<|endoftext|>",
-            eos_token="<|endoftext|>",
-            pad_token="[PAD]",
-            add_prefix_space=False,
-            add_bos_token=False,
-            add_eos_token=False,
-            **kwargs
-    ):
-        self.add_bos_token = add_bos_token
-        self.add_eos_token = add_eos_token
-
-        with open(vocab_file, 'r', encoding="utf-8") as vocab_handle:
-            self.encoder = json.load(vocab_handle)
-        self.decoder = {v: k for k, v in self.encoder.items()}
-
-        with open(merge_file, 'r', encoding="utf-8") as merge_handle:
-            bpe_merges = merge_handle.read().split('\n')[1:-1]
-
-        bpe_merges = [tuple(merge.split()) for merge in bpe_merges]
-        self.the_unk_token = unk_token
-        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
-        self.byte_encoder = bytes_to_unicode()
-        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
-
-        self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
-        self.add_prefix_space = add_prefix_space
-        self.cache = {}
-
-        super(WizardCoderTokenizer, self).__init__(
-            unk_token=unk_token, bos_token=bos_token, eos_token=eos_token, pad_token=pad_token, **kwargs
-        )
-
-        self.add_tokens([self.pad_token, unk_token, bos_token, eos_token], special_tokens=True)
-
-    def bpe(self, token):
-        """ bpe encode """
-        if token in self.cache:
-            return self.cache[token]
-
-        word = tuple(token)
-        pairs = get_pairs(token)
-        if not pairs:
-            return token
-
-        while True:
-            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
-            if bigram not in self.bpe_ranks:
-                break
-            first, second = bigram
-            new_word = []
-            i = 0
-            while i < len(word):
-                try:
-                    j = word.index(first, i)
-                except ValueError:
-                    new_word.extend(word[i:])
-                    break
-                else:
-                    new_word.extend(word[i:j])
-                    i = j
-
-                if word[i] == first and i + 1 < len(word) and word[i + 1] == second:
-                    new_word.append(first + second)
-                    i += 2
-                else:
-                    new_word.append(word[i])
-                    i += 1
-            new_word = tuple(new_word)
-            word = new_word
-            if len(word) == 1:
-                break
-            else:
-                pairs = get_pairs(word)
-        word = " ".join(word)
-        self.cache[token] = word
-        return word
-
-    def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None):
-        """
-        Build model inputs from a sequence or a pair of sequence by concatenating and adding special tokens.
-
-        A WizardCoder sequence has the following format:
-        - single sequence: ``<bos> X <eos>``
-        - pair of sequences: ``<bos> A <eos> B <eos>``
-
-        Args:
-            token_ids_0 (List[int]): List of IDs to which the special tokens will be added
-            token_ids_1 (List[int], `optional`, defaults to `None`): Optional second list of IDs for sequence pairs.
-        """
-        bos = [self.bos_token_id] if self.add_bos_token else []
-        eos = [self.eos_token_id] if self.add_eos_token else []
-        if token_ids_1 is None:
-            return bos + token_ids_0 + eos
-        return bos + token_ids_0 + eos + token_ids_1 + eos
-
-    def _tokenize(self, text):
-        """ Tokenize a string using bpe encode. """
-        text, _ = self.prepare_for_tokenization(text, is_pretokenized=False)
-        bpe_tokens = []
-        for token in re.findall(self.pat, text):
-            token = "".join(
-                self.byte_encoder[b] for b in token.encode("utf-8")
-            )
-            bpe_tokens.extend(bpe_token for bpe_token in self.bpe(token).split(" "))
-        return bpe_tokens
-
-    def _convert_token_to_id(self, token):
-        """Converts a token (str) in an id using the vocab."""
-        return self.encoder.get(token, self.encoder.get(self.the_unk_token))
-
-    def _convert_tokens_to_ids(self, tokens):
-        """ the index of the tokens in the vocabulary. """
-        if isinstance(tokens, str):
-            return self.encoder.get(tokens, self.encoder.get(self.the_unk_token))
-        output = []
-        for token in tokens:
-            output.append(self.encoder.get(token, self.encoder.get(self.the_unk_token)))
-        return output
-
-    def _convert_ids_to_tokens(self, ids):
-        """ return the origin bpe tokens according to ids """
-        if isinstance(ids, int):
-            return self.decoder.get(ids)
-
-        if isinstance(ids, list):
-            output = []
-            for item in ids:
-                output.append(self.decoder.get(item))
-            return output
-        raise TypeError(f"The type of ids should be int or list, but found {type(ids)}.")
-
-    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (str) using the vocab."""
-        return self.decoder.get(index)
-
-    def _convert_tokens_to_string(self, tokens):
-        """ return a string according to the list of tokens"""
-        text = "".join(tokens)
-        text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors='ignore')
-        return text
-
-    def convert_tokens_to_string(self, tokens):
-        """Convert the tokens to the string"""
-        return self._convert_tokens_to_string(tokens)
-
-    def prepare_for_tokenization(self, text, **kwargs):
-        add_prefix_space = kwargs.pop("add_prefix_space", self.add_prefix_space)
-        is_split_into_words = kwargs.pop("is_split_into_words", False)
-        if is_split_into_words or add_prefix_space:
-            text = " " + text
-        return (text, kwargs)
-
-    def save_vocabulary(self, save_directory, filename_prefix):
-        """write the word to the files"""
-        output_file_path = os.path.join(save_directory, filename_prefix)
-        with open(output_file_path, 'w') as fp:
-            for k in self.vocab_dict.keys():
-                fp.write(k + '\n')
-        return output_file_path
-
-    @property
-    def vocab_size(self):
-        """Get the vocab size of the """
-        return len(self.decoder)
-
-    def get_vocab(self):
-        """Returns vocab as a dict"""
-        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
-        vocab.update(self.added_tokens_encoder)
-        return vocab
diff --git a/research/yi/yi.md b/research/yi/yi.md
index 35b369c0..6fa894d7 100644
--- a/research/yi/yi.md
+++ b/research/yi/yi.md
@@ -275,7 +275,7 @@ DEVICE_NUM:   使用卡数, 仅开启多卡推理时生效
 
 ### 单卡推理
 
-`Yi-6b-Base`支持单卡推理，`Yi-34b`模型规模较大，仅支持多卡卡推理。
+`Yi-6b-Base`支持单卡推理，`Yi-34b`模型规模较大，仅支持多卡推理。
 
 ```shell
 bash scripts/examples/yi/run_yi_predict.sh single \
diff --git a/research/ziya/run_ziya_13b.yaml b/research/ziya/run_ziya_13b.yaml
deleted file mode 100755
index a6f034b5..00000000
--- a/research/ziya/run_ziya_13b.yaml
+++ /dev/null
@@ -1,206 +0,0 @@
-seed: 0
-output_dir: './output' # path to save checkpoint/strategy
-load_checkpoint: ''
-src_strategy_path_or_dir: ''
-auto_trans_ckpt: False  # If true, auto transform load_checkpoint to load in distributed model
-only_save_strategy: False
-resume_training: False
-use_parallel: False
-run_mode: 'predict'
-
-# trainer config
-trainer:
-  type: CausalLanguageModelingTrainer
-  model_name: 'llama_13b'
-# if True, do evaluate during the training process. if false, do nothing.
-# note that the task trainer should support _evaluate_in_training function.
-do_eval: False
-eval_step_interval: -1        # num of step intervals between each eval, -1 means no step end eval.
-eval_epoch_interval: 50        # num of epoch intervals between each eval, 1 means eval on every epoch end.
-
-# runner config
-runner_config:
-  epochs: 1
-  batch_size: 4
-  sink_mode: True
-  sink_size: 2
-
-# optimizer
-optimizer:
-  type: FP32StateAdamWeightDecay
-  beta1: 0.9
-  beta2: 0.95
-  eps: 1.e-8 # 1e-8
-  learning_rate: 3.e-4
-
-# lr schedule
-lr_schedule:
-  type: CosineWithWarmUpLR
-  learning_rate: 3.e-4
-  lr_end: 3.e-5
-  warmup_ratio: 0.03
-  total_steps: -1 # -1 means it will load the total steps of the dataset
-
-# dataset
-train_dataset: &train_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: True
-  input_columns: ["input_ids"]  # "input_ids", "labels" , labels are used in instruction finetune.
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: True
-  batch_size: 4
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-train_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *train_dataset
-
-# eval dataset
-eval_dataset: &eval_dataset
-  data_loader:
-    type: MindDataset
-    dataset_dir: ""
-    shuffle: False
-  input_columns: ["input_ids"]
-  num_parallel_workers: 8
-  python_multiprocessing: False
-  drop_remainder: False
-  repeat: 1
-  numa_enable: False
-  prefetch_size: 1
-eval_dataset_task:
-  type: CausalLanguageModelDataset
-  dataset_config: *eval_dataset
-
-# default parallel of device num = 16 for Atlas 800
-parallel_config:
-  data_parallel: 2
-  model_parallel: 4
-  pipeline_stage: 2
-  micro_batch_num: 16
-  vocab_emb_dp: False
-  gradient_aggregation_group: 4
-# when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process.
-micro_batch_interleave_num: 1
-
-# recompute config
-recompute_config:
-  recompute: True
-  parallel_optimizer_comm_recompute: False
-  mp_comm_recompute: True
-  recompute_slice_activation: True
-
-# callbacks
-callbacks:
-  - type: MFLossMonitor
-  - type: CheckpointMonitor
-    prefix: "ziya_13b"
-    save_checkpoint_steps: 100
-    integrated_save: False
-    async_save: False
-  - type: ObsMonitor
-
-# mindspore context init config
-context:
-  mode: 0 #0--Graph Mode; 1--Pynative Mode
-  device_target: "Ascend"
-  enable_graph_kernel: False
-  max_call_depth: 10000
-  max_device_memory: "31GB"
-  save_graphs: False
-  save_graphs_path: "./graph"
-  device_id: 5
-
-# parallel context config
-parallel:
-  parallel_mode: 1 # 0-dataset, 1-semi, 2-auto, 3-hybrid
-  gradients_mean: False
-  enable_alltoall: False
-  full_batch: True
-  search_mode: "sharding_propagation"
-  enable_parallel_optimizer: True
-  strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
-  parallel_optimizer_config:
-    gradient_accumulation_shard: False
-    parallel_optimizer_threshold: 64
-
-# model config
-model:
-  model_config:
-    type: LlamaConfig
-    batch_size: 1 # add for increase predict
-    seq_length: 2048
-    hidden_size: 5120
-    num_layers: 40
-    num_heads: 40
-    vocab_size: 39424
-    multiple_of: 256
-    rms_norm_eps: 1.0e-6
-    bos_token_id: 1
-    eos_token_id: 2
-    pad_token_id: 0
-    ignore_token_id: -100
-    compute_dtype: "float16"
-    layernorm_compute_type: "float32"
-    softmax_compute_type: "float32"
-    rotary_dtype: "float16"
-    param_init_type: "float16"
-    use_past: True
-    offset: 0
-    checkpoint_name_or_path: "ziya_13b"
-    repetition_penalty: 1
-    max_decode_length: 512
-    top_k: 3
-    top_p: 1
-    do_sample: False
-  arch:
-    type: LlamaForCausalLM
-
-processor:
-  return_tensors: ms
-  tokenizer:
-    unk_token: '<unk>'
-    bos_token: '<s>'
-    eos_token: '</s>'
-    pad_token: '<pad>'
-    type: LlamaTokenizer
-    vocab_file: "tokenizer.model"
-  type: LlamaProcessor
-
-# metric
-metric:
-  type: PerplexityMetric
-
-# wrapper cell config
-runner_wrapper:
-  type: MFTrainOneStepCell
-  scale_sense:
-    type: DynamicLossScaleUpdateCell
-    loss_scale_value: 65536
-    scale_factor: 2
-    scale_window: 1000
-  use_clip_grad: True
-
-eval_callbacks:
-  - type: ObsMonitor
-
-auto_tune: False
-filepath_prefix: './autotune'
-autotune_per_step: 10
-
-profile: False
-profile_start_step: 1
-profile_stop_step: 10
-init_start_profile: False
-profile_communication: False
-profile_memory: True
-layer_scale: False
-layer_decay: 0.65
-lr_scale_factor: 256
-
-# aicc
-remote_save_url: "Please input obs url on AICC platform."
diff --git a/research/ziya/ziya.md b/research/ziya/ziya.md
deleted file mode 100644
index 1fae5234..00000000
--- a/research/ziya/ziya.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# ZiYa
-
-“姜子牙”系列大模型是由IDEA研究院推出的开源通用大模型，具备翻译，编程，文本分类，信息抽取，摘要，文案生成，常识问答和数学计算等能力。目前姜子牙通用大模型v1(Ziya-LLaMA-13B-v1)已完成大规模预训练、多任务有监督微调和人类反馈学习三阶段的训练过程。
-
-姜子牙通用大模型v1.1(Ziya-LLaMA-13B-v1.1)对Ziya-LLaMA-13B-v1模型进行继续优化，通过调整微调数据的比例和采用更优的强化学习策略，本版本在问答准确性、数学能力以及安全性等方面得到了提升。
-
-## Ziya-LLaMA-13B
-
-Ziya-LLaMA-13B拥有130亿参数，模型结构采用LLaMA-13B，重新构建了中文词表，进行千亿token量级的已知的最大规模继续预训练，使模型具备原生中文能力。再经过500万条多任务样本的有监督微调(SFT)和综合人类反馈训练(RM+PPO+HFFT+COHFT+RBRS)，进一步激发和加强各种AI任务能力。
-
-我们可以复用llama的代码，通过转换脚本将huggingface格式的子牙权重文件转换为mindspore格式的ckpt，再基于mindformer提供的高阶接口进行训练推理。
-
-### 快速使用
-
-#### Ziya-LLaMA-13B 预训练权重转换
-
-请参考[Ziya-LLaMA-13B-v1](https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1#-%E4%BD%BF%E7%94%A8-usage-)使用Usage，按照步骤得到子牙原始权重。
-
-- 其中step1获取huggingface权重可以下载[llama-13b-hf](https://huggingface.co/decapoda-research/llama-13b-hf/tree/main)，然后根据step2和子牙权重合并，得到完整的子牙13B权重。
-
-执行权重转换脚本
-
-```shell
-python mindformers/models/llama/convert_weight.py --torch_ckpt_dir TORCH_CKPT_DIR --mindspore_ckpt_path MS_CKPT_NAME
-```
-
-```text
-# 参数说明
-TORCH_CKPT_DIR: huggingface权重保存目录路径
-mindspore_ckpt_path: 权重保存文件名，保存为TORCH_CKPT_DIR/OUTPUT_NAME, 也可以指定为自定义保存路径
-```
-
-#### 推理
-
-- pipeline接口推理
-
-```python
-import mindspore as ms
-
-from mindformers.pipeline import pipeline
-from mindformers.tools.register import MindFormerConfig
-from mindformers.models import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
-
-ms.set_context(device_target="Ascend", device_id=6, mode=0)
-config = MindFormerConfig('research/ziya/run_ziya_13b.yaml')
-
-model_path = 'Your model path'
-tokenizer_path = 'Your tokenizer path'
-
-config.model.model_config.checkpoint_name_or_path = model_path
-model_config = LlamaConfig(**config.model.model_config)
-ziya_model = LlamaForCausalLM(model_config)
-tokenizer = LlamaTokenizer(tokenizer_path, add_bos_token=True, add_eos_token=False)
-tokenizer.add_tokens(["<human>", "<bot>"], special_tokens=True)
-
-pipeline_task = pipeline("text_generation", model=ziya_model, tokenizer=tokenizer)
-
-query = "帮我写一份去西安的旅游计划"
-pipeline_result = pipeline_task(inputs, do_sample=False, max_length=512, add_special_tokens=True)
-print(pipeline_result[0]['text_generation_text'])
-```
-
-**推理结果示例**
-
-```text
-'帮我写一份去西安的旅游计划 1、行程安排 2、交通方式 3、住宿安排 4、景点推荐 5、美食推荐 6、注意事项 7、其他建议 1、行程安排 第一天：到达西安，入住酒店，游览大雁塔、明城墙、回民街 第二天：参观兵马俑、华清池、大唐芙蓉园 第三天：游览西安城墙、钟鼓楼、陕西历史博物馆 第四天：参观西安碑林、陕西国际博览中心、大唐芙蓉园 第五天：游览华山、参观华山景区内的景点 第六天：游览华山、参观华山景区内的景点 第七天：游览华山、参观华山景区内的景点 第八天：离开西安 2、交通方式 建议乘坐高铁或飞机前往，可以选择在西安市区内乘坐地铁或出租车。 3、住宿安排 可以选择在市中心或景区附近的酒店住宿，方便游览景点。 4、景点推荐 大雁塔、明城墙、回民街、兵马俑、华清池、大唐芙蓉园、西安城墙、钟鼓楼、陕西历史博物馆、碑林、陕西国际博览中心、华山、华山景区内的景点。 5、美食推荐 可以品尝到肉夹馍、凉皮、羊肉泡馍、羊肉串、糖葫芦等特色美食。 6、注意事项 注意防晒、防蚊虫叮咬，注意保暖，避免着凉。 7、其他建议 可以购买当地特色纪念品，可以参加当地的文化活动，可以品尝当地美食。 以上是我的建议，希望能够帮助到您。'
-```
-
-#### 训练与微调
-
-基于ziya-13b，目前提供了模型的基础配置文件`research/ziya/run_ziya_13b.yaml`。可参考llama的[预训练](https://gitee.com/mindspore/mindformers/blob/dev/docs/model_cards/llama.md#%E9%A2%84%E8%AE%AD%E7%BB%83)与[微调](https://gitee.com/mindspore/mindformers/blob/dev/docs/model_cards/llama.md#%E5%BE%AE%E8%B0%83)章节。
-
-`注：使用ziya-13b进行训练或者微调时，需要使用ziya-13b配套的tokenizer.model处理数据集，以及选用ziya-13b的yaml配置文件进行任务启动。`
\ No newline at end of file
diff --git a/tests/st/test_model/test_bert_model/__init__.py b/tests/st/test_model/test_bert_model/__init__.py
deleted file mode 100644
index 67e40d94..00000000
--- a/tests/st/test_model/test_bert_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test bert."""
diff --git a/tests/st/test_model/test_bert_model/test_auto_class.py b/tests/st/test_model/test_bert_model/test_auto_class.py
deleted file mode 100644
index 1b3d41c2..00000000
--- a/tests/st/test_model/test_bert_model/test_auto_class.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the bert interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_bert_model/test_auto_class.py
-"""
-import os
-import shutil
-import mindspore as ms
-
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoTokenizer, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, PreTrainedTokenizerBase, ProcessorMixin
-
-ms.set_context(mode=0)
-
-
-class TestBertAutoClassMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_list = ['bert_base_uncased']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_auto_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_auto_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
-
-    def test_auto_tokenizer(self):
-        """
-        Feature: AutoTokenizer, input config.
-        Description: Test to get tokenizer instance by input tokenizer type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for tokenizer_type in self.test_list:
-            tokenizer = AutoTokenizer.from_pretrained(tokenizer_type)
-            assert isinstance(tokenizer, PreTrainedTokenizerBase)
-            tokenizer.save_pretrained(
-                save_directory=os.path.join(self.save_directory, tokenizer_type),
-                save_name=tokenizer_type + '_tokenizer')
diff --git a/tests/st/test_model/test_bert_model/test_pipeline.py b/tests/st/test_model/test_bert_model/test_pipeline.py
deleted file mode 100644
index a66030f7..00000000
--- a/tests/st/test_model/test_bert_model/test_pipeline.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the bert interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_bert_model/test_pipeline.py
-"""
-import mindspore as ms
-
-from mindformers import pipeline
-
-ms.set_context(mode=0)
-
-
-class TestBertPipelineMethod:
-    """A test class for testing pipeline."""
-    def setup_method(self):
-        """setup method."""
-        self.test_list = ['bert_base_uncased']
-
-    def test_pipeline(self):
-        """
-        Feature: pipeline.
-        Description: Test pipeline by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        for model_type in self.test_list:
-            task_pipeline = pipeline(task='fill_mask', model=model_type, max_length=128, padding='max_length')
-            task_pipeline([" Hello I am a [MASK] model.",])
diff --git a/tests/st/test_model/test_bert_model/test_trainer.py b/tests/st/test_model/test_bert_model/test_trainer.py
deleted file mode 100644
index 8413abc0..00000000
--- a/tests/st/test_model/test_bert_model/test_trainer.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the bert interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_bert_model/test_gpt_trainer.py
-"""
-import numpy as np
-import pytest
-
-import  mindspore as ms
-
-from mindspore.dataset import GeneratorDataset
-from mindformers.models import BertConfig, BertForPreTraining
-from mindformers import Trainer, TrainingArguments
-
-ms.set_context(mode=0)
-
-
-def generator():
-    """dataset generator"""
-    data = np.random.randint(low=0, high=15, size=(128,)).astype(np.int32)
-    input_mask = np.ones_like(data)
-    token_type_id = np.zeros_like(data)
-    next_sentence_lables = np.array([1]).astype(np.int32)
-    masked_lm_positions = np.array([1, 2]).astype(np.int32)
-    masked_lm_ids = np.array([1, 2]).astype(np.int32)
-    masked_lm_weights = np.ones_like(masked_lm_ids)
-    train_data = (data, input_mask, token_type_id, next_sentence_lables,
-                  masked_lm_positions, masked_lm_ids, masked_lm_weights)
-    for _ in range(4):
-        yield train_data
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestBertTrainerMethod:
-    """A test class for testing trainer."""
-
-    def setup_method(self):
-        """init task trainer."""
-        args = TrainingArguments(batch_size=1, num_train_epochs=1)
-        train_dataset = GeneratorDataset(generator, column_names=["input_ids", "input_mask", "segment_ids",
-                                                                  "next_sentence_labels", "masked_lm_positions",
-                                                                  "masked_lm_ids", "masked_lm_weights"])
-        train_dataset = train_dataset.batch(batch_size=1)
-
-        model_config = BertConfig(batch_size=1, num_hidden_layers=2)
-        model = BertForPreTraining(model_config)
-
-        self.task_trainer = Trainer(task='fill_mask',
-                                    model=model,
-                                    args=args,
-                                    train_dataset=train_dataset)
-
-    @pytest.mark.run(order=1)
-    def test_train(self):
-        """
-        Feature: Trainer.train()
-        Description: Test trainer for train.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # self.task_trainer.train()
-
-    @pytest.mark.run(order=2)
-    def test_eval(self):
-        """
-        Feature: Trainer.evaluate()
-        Description: Test trainer for evaluate.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-
-    @pytest.mark.run(order=3)
-    def test_predict(self):
-        """
-        Feature: Trainer.predict()
-        Description: Test trainer for predict.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        input_data = [" Hello I am a [MASK] model.",]
-        self.task_trainer.predict(input_data=input_data)
-
-    @pytest.mark.run(order=4)
-    def test_finetune(self):
-        """
-        Feature: Trainer.finetune()
-        Description: Test trainer for finetune.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
diff --git a/tests/st/test_model/test_bloom_model/__init__.py b/tests/st/test_model/test_bloom_model/__init__.py
deleted file mode 100644
index 3caf1904..00000000
--- a/tests/st/test_model/test_bloom_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test bloom model."""
diff --git a/tests/st/test_model/test_bloom_model/base_model.py b/tests/st/test_model/test_bloom_model/base_model.py
deleted file mode 100644
index 45bfcc50..00000000
--- a/tests/st/test_model/test_bloom_model/base_model.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""Bloom Base Model."""
-from mindformers.models.bloom import BloomLMHeadModel, BloomConfig
-
-# copy from run_bloom_7.1b.yaml
-BASE_CONFIG = {
-    'attention_dropout_rate': 0.1,
-    'bos_token_id': 1,
-    'compute_dtype': 'float16',
-    'embedding_init_type': 'float32',
-    'eos_token_id': 2,
-    'expand_ratio': 4,
-    'hidden_act': 'gelu',
-    'hidden_dropout_rate': 0.1,
-    'hidden_size': 4096,
-    'initializer_range': 0.02,
-    'layernorm_compute_type': 'float32',
-    'max_decode_length': 1024,
-    'num_heads': 32,
-    'num_layers': 2,  # 30
-    'param_init_type': 'float16',
-    'repetition_penalty': 1,
-    'seq_length': 2048,
-    'softmax_compute_type': 'float16',
-    'top_k': 5,
-    'top_p': 1,
-    'type': 'BloomConfig',
-    'use_flash_attention': True,
-    'use_select_recompute': False,
-    'use_seq_parallel': True,
-    'vocab_size': 250880
-}
-
-
-def get_config():
-    """get instanced model config."""
-    return BloomConfig(**BASE_CONFIG)
-
-
-def get_model(config):
-    """get instanced model."""
-    return BloomLMHeadModel(config)
diff --git a/tests/st/test_model/test_bloom_model/test_auto_class.py b/tests/st/test_model/test_bloom_model/test_auto_class.py
deleted file mode 100644
index 6e285f65..00000000
--- a/tests/st/test_model/test_bloom_model/test_auto_class.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test module for testing the bloom interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_bloom_model/test_auto_class.py
-"""
-import os
-import shutil
-import mindspore as ms
-
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoTokenizer, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, PreTrainedTokenizerBase, ProcessorMixin
-
-ms.set_context(mode=0)
-
-
-class TestBloomAutoClassMethod:
-    """A test class for testing Model classes"""
-
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_llm_list = ['bloom_560m']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_llm_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_llm_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get LL-Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_llm_list:
-            model = AutoModel.from_pretrained(model_type)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_llm_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_llm_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_llm_processor(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_llm_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
-
-    def test_llm_tokenizer(self):
-        """
-        Feature: AutoTokenizer, input config.
-        Description: Test to get tokenizer instance by input tokenizer type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for tokenizer_type in self.test_llm_list:
-            tokenizer = AutoTokenizer.from_pretrained(tokenizer_type)
-            assert isinstance(tokenizer, PreTrainedTokenizerBase)
-            tokenizer.save_pretrained(
-                save_directory=os.path.join(self.save_directory, tokenizer_type),
-                save_name=tokenizer_type + '_tokenizer')
diff --git a/tests/st/test_model/test_bloom_model/test_eval.py b/tests/st/test_model/test_bloom_model/test_eval.py
deleted file mode 100644
index 5ec7b0c7..00000000
--- a/tests/st/test_model/test_bloom_model/test_eval.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test bloom evaluate.
-How to run this:
-    pytest tests/st/test_model/test_bloom_model/test_eval.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestBloomEval:
-    """A test class for testing model evaluate."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model evaluate
-        Description: Test base model evaluate.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='eval', batch_size=4, use_label=True)
-
-        model_config = get_config()
-        model_config.batch_size = runner.batch_size  # set batch size for prediction
-        # if set default, cause Memory pool not enough by large alibi tensor
-        model_config.seq_length = 1024
-        model_config.vocab_size = 128  # if set too large, will cause OverflowError
-
-        model = get_model(model_config)
-
-        runner.set_eval(model, model_config, metric='PerplexityMetric')
diff --git a/tests/st/test_model/test_bloom_model/test_pipeline.py b/tests/st/test_model/test_bloom_model/test_pipeline.py
deleted file mode 100644
index 6c497543..00000000
--- a/tests/st/test_model/test_bloom_model/test_pipeline.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the bloom interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_bloom_model/test_pipeline.py
-"""
-import mindspore as ms
-
-from mindformers import pipeline
-
-ms.set_context(mode=0)
-
-
-class TestBloomPipelineMethod:
-    """A test class for testing pipeline."""
-    def setup_method(self):
-        """setup method."""
-        self.test_llm_list = ['bloom_560m']
-
-    def test_pipeline(self):
-        """
-        Feature: pipeline.
-        Description: Test pipeline by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        for model_type in self.test_llm_list:
-            task_pipeline = pipeline(task='text_generation', model=model_type, max_length=20)
-            task_pipeline("hello!", top_k=3)
diff --git a/tests/st/test_model/test_bloom_model/test_predict.py b/tests/st/test_model/test_bloom_model/test_predict.py
deleted file mode 100644
index 6586c363..00000000
--- a/tests/st/test_model/test_bloom_model/test_predict.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test bloom predict.
-How to run this:
-    pytest tests/st/test_model/test_bloom_model/test_predict.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestBloomPredict:
-    """A test class for testing model prediction."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model predict
-        Description: Test base model prediction.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='predict', batch_size=2, experiment_mode=False)
-
-        model_config = get_config()
-        model_config.batch_size = runner.batch_size  # set batch size for prediction
-        model_config.vocab_size = 32000  # default to use llama2 tokenizer
-
-        model = get_model(model_config)
-
-        outputs = 'hello world.................'
-        runner.set_predict(model=model, expect_outputs=outputs)
diff --git a/tests/st/test_model/test_bloom_model/test_train.py b/tests/st/test_model/test_bloom_model/test_train.py
deleted file mode 100644
index 525827f8..00000000
--- a/tests/st/test_model/test_bloom_model/test_train.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test bloom train.
-How to run this:
-    pytest tests/st/test_model/test_bloom_model/test_train.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestBloomTrain:
-    """A test class for testing model training precision."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model train
-        Description: Test base model training precision.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='train', batch_size=4, experiment_mode=False)
-
-        model_config = get_config()
-        # if set default, cause Memory pool not enough by large alibi tensor
-        model_config.seq_length = 1024
-
-        loss_std = [31.927244, 31.562233, 30.998333, 30.172699, 29.056198,
-                    27.678558, 26.111250, 24.342562, 22.492617, 20.694494,
-                    19.059685, 17.722950, 16.582079, 15.667753, 14.978457,
-                    14.505189, 14.146716, 13.990182, 13.862482, 13.853906]
-
-        model = get_model(model_config)
-
-        runner.set_train(model, model_config, loss_std=loss_std)
diff --git a/tests/st/test_model/test_clip_model/__init__.py b/tests/st/test_model/test_clip_model/__init__.py
deleted file mode 100644
index 02d092ea..00000000
--- a/tests/st/test_model/test_clip_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test clip."""
diff --git a/tests/st/test_model/test_clip_model/test_clip_model.py b/tests/st/test_model/test_clip_model/test_clip_model.py
deleted file mode 100644
index 0c9c1923..00000000
--- a/tests/st/test_model/test_clip_model/test_clip_model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test Module for testing functions of AutoModel and CLIPModel class
-
-How to run this:
-windows:  pytest .\\tests\\st\\test_model\\test_clip_model\\test_clip_model.py
-linux:  pytest ./tests/st/test_model/test_clip_model/test_clip_model.py
-"""
-import os
-import time
-
-import mindspore as ms
-
-from mindformers import MindFormerBook, AutoConfig, AutoModel
-from mindformers.models import CLIPModel, PreTrainedModel
-from mindformers.tools import logger
-
-ms.set_context(mode=0)
-
-
-class TestCLIPModelMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """get_input"""
-        self.model_type = "clip_vit_b_32"
-
-        self.checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                           'clip')
-        self.config_path = os.path.join(MindFormerBook.get_project_path(),
-                                        'configs', 'clip', 'run_clip_vit_b_32_pretrain_flickr8k.yaml')
-        self.config = AutoConfig.from_pretrained(self.model_type)
-
-        self.checkpoint_path = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                            'clip', self.model_type + '.ckpt')
-        self.save_directory = os.path.join(MindFormerBook.get_default_checkpoint_save_folder(),
-                                           'clip')
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel, from_pretrained, from_config
-        Description: Test to get model instance by AutoModel.from_pretrained
-                    and AutoModel.from_config
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        time.sleep(5)
-
-        AutoModel.show_support_list()
-        support_list = AutoModel.get_support_list()
-        logger.info(support_list)
-        # input model name, load model and weights
-        model_a = AutoModel.from_pretrained(self.model_type)
-        # input model directory, load model and weights
-        model_b = AutoModel.from_pretrained(self.checkpoint_dir)
-        # input yaml path, load model without weights
-        model_c = AutoModel.from_config(self.config_path)
-        # input config, load model without weights
-        model_d = AutoModel.from_config(self.config)
-
-        model_a.save_pretrained(self.save_directory, save_name=self.model_type)
-
-        CLIPModel.show_support_list()
-        support_list = CLIPModel.get_support_list()
-        logger.info(support_list)
-        # input model name, load model and weights
-        model_i = CLIPModel.from_pretrained(self.model_type)
-        # input model directory, load model and weights
-        model_j = CLIPModel.from_pretrained(self.checkpoint_dir)
-        # input config, load model weights
-        model_k = CLIPModel(self.config)
-        # input config, load model without weights
-        self.config.checkpoint_name_or_path = None
-        model_l = CLIPModel(self.config)
-
-        model_i.save_pretrained(self.save_directory, save_name=self.model_type)
-
-        # all models are ClipModel class， and inherited from PreTrainedModel
-        assert isinstance(model_i, CLIPModel)
-        assert isinstance(model_j, CLIPModel)
-        assert isinstance(model_k, CLIPModel)
-        assert isinstance(model_l, CLIPModel)
-
-        assert isinstance(model_i, PreTrainedModel)
-        assert isinstance(model_j, PreTrainedModel)
-        assert isinstance(model_k, PreTrainedModel)
-        assert isinstance(model_l, PreTrainedModel)
-
-        # all models are CLIPModel class， and inherited from PreTrainedModel
-        assert isinstance(model_a, CLIPModel)
-        assert isinstance(model_b, CLIPModel)
-        assert isinstance(model_c, CLIPModel)
-        assert isinstance(model_d, CLIPModel)
-
-        assert isinstance(model_a, PreTrainedModel)
-        assert isinstance(model_b, PreTrainedModel)
-        assert isinstance(model_c, PreTrainedModel)
-        assert isinstance(model_d, PreTrainedModel)
diff --git a/tests/st/test_model/test_clip_model/test_clip_tokenizer.py b/tests/st/test_model/test_clip_model/test_clip_tokenizer.py
deleted file mode 100644
index 6b0cf6a1..00000000
--- a/tests/st/test_model/test_clip_model/test_clip_tokenizer.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test Module for testing Tokenizer class
-
-How to run this:
-linux:  pytest ./tests/st/test_model/test_clip_model/test_clip_tokenizer.py
-"""
-import os
-import shutil
-
-import pytest
-from mindspore import Tensor
-
-from mindformers import CLIPTokenizer, AutoTokenizer
-
-
-def generate_fake_vocab(output_path):
-    vocabs = ["[PAD]", "[unused1]", "[UNK]", "[CLS]", "[SEP]", "[MASK]", "hello", "world", "!"]
-    with open(os.path.join(output_path, 'vocab_file.txt'), 'w') as fp:
-        for item in vocabs:
-            fp.write(item + '\n')
-
-
-class TestAutoTokenizerMethod:
-    """A test class for testing the AutoTokenizer"""
-    def setup_method(self):
-        self.output_path = os.path.join(os.path.dirname(__file__), 'test_tokenizer_output')
-        os.makedirs(self.output_path, exist_ok=True)
-        generate_fake_vocab(self.output_path)
-
-    def teardown_method(self):
-        shutil.rmtree(self.output_path, ignore_errors=True)
-
-    @pytest.mark.run(order=1)
-    def test_save_and_load_using_bert_tokenizer(self):
-        """
-        Feature: The test load and save function for the clip tokenizer
-        Description: Load the tokenizer and then saved it
-        Expectation: The restored kwargs is not expected version.
-        """
-        clip_tokenizer = CLIPTokenizer.from_pretrained("clip_vit_b_32")
-        res = clip_tokenizer.tokenize("hello world?")
-        assert isinstance(clip_tokenizer, CLIPTokenizer)
-        assert res == ['hello</w>', 'world</w>', '?</w>']
-
-    @pytest.mark.run(order=2)
-    def test_load_from_yaml(self):
-        """
-        Feature: The test load from yaml and save as the yaml for the tokenizer
-        Description: Load the tokenizer and then saved it
-        Expectation: The restored kwargs is not expected version.
-        """
-        tokenizer = AutoTokenizer.from_pretrained("clip_vit_b_32")
-        res = tokenizer.tokenize("hello world?")
-        assert isinstance(tokenizer, CLIPTokenizer)
-        assert res == ['hello</w>', 'world</w>', '?</w>']
-
-
-class TestClipTokenizerMethod:
-    """Test the basic usage of the CLIPTokenizer"""
-    def test_padding(self):
-        """
-        Feature: The CLIPTokenizer test using padding
-        Description: Using call forward process of the tokenizer without error
-        Expectation: The returned ret is not equal to the target.
-        """
-        clip_tokenizer = CLIPTokenizer.from_pretrained("clip_vit_b_32")
-        res = clip_tokenizer("hello world?", max_length=8, padding='max_length')
-        pad_id = clip_tokenizer.pad_token_id
-        assert res == {'attention_mask': [1, 1, 1, 1, 1, 0, 0, 0],
-                       'input_ids': [49406, 3306, 1002, 286, 49407, pad_id, pad_id, pad_id]}, f"The res is {res}."
-
-        clip_tokenizer = CLIPTokenizer.from_pretrained("clip_vit_b_32")
-        res = clip_tokenizer("hello world?", max_length=8, padding='max_length', return_tensors='ms')
-        assert len(res) == 2
-        for k in res.keys():
-            assert isinstance(res[k], Tensor)
-
-        clip_tokenizer = CLIPTokenizer.from_pretrained("clip_vit_b_32")
-        batch_inputs = ["hello world?", "Who are you?", "I am find, thank you."]
-        res = clip_tokenizer(batch_inputs, max_length=12, padding='max_length')
-        assert len(res) == 2
-        assert res == {'input_ids': [[49406, 3306, 1002, 286, 49407, pad_id, pad_id, pad_id, pad_id,
-                                      pad_id, pad_id, pad_id],
-                                     [49406, 822, 631, 592, 286, 49407, pad_id, pad_id, pad_id,
-                                      pad_id, pad_id, pad_id],
-                                     [49406, 328, 687, 1416, 267, 1144, 592, 269, 49407, pad_id, pad_id, pad_id]],
-                       'attention_mask': [[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-                                          [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
-                                          [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]}
diff --git a/tests/st/test_model/test_codegeex2_model/__init__.py b/tests/st/test_model/test_codegeex2_model/__init__.py
deleted file mode 100644
index 690fb862..00000000
--- a/tests/st/test_model/test_codegeex2_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test codegeex2 model."""
diff --git a/tests/st/test_model/test_codegeex2_model/base_model.py b/tests/st/test_model/test_codegeex2_model/base_model.py
deleted file mode 100644
index f7cee62d..00000000
--- a/tests/st/test_model/test_codegeex2_model/base_model.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""CodeGeeX2 Base Model."""
-from mindformers.models.glm2 import ChatGLM2Config, ChatGLM2ForConditionalGeneration
-
-# copy from run_codegeex2_6b.yaml
-BASE_CONFIG = {
-    'add_bias_linear': False,
-    'add_qkv_bias': True,
-    'apply_query_key_layer_scaling': True,
-    'apply_residual_connection_post_layernorm': False,
-    'attention_dropout': 0.0,
-    'attention_softmax_in_fp32': True,
-    'bias_dropout_fusion': True,
-    'compute_dtype': 'float16',
-    'do_sample': True,
-    'eos_token_id': 2,
-    'ffn_hidden_size': 13696,
-    'fp32_residual_connection': False,
-    'hidden_dropout': 0.0,
-    'hidden_size': 4096,
-    'interleaved_qkv': False,
-    'kv_channels': 128,
-    'layernorm_compute_type': 'float32',
-    'layernorm_epsilon': '1e-5',
-    'max_decode_length': 256,
-    'multi_query_attention': True,
-    'multi_query_group_num': 2,
-    'num_attention_heads': 32,
-    'num_layers': 2,  # 28
-    'padded_vocab_size': 65024,
-    'param_init_type': 'float16',
-    'post_layer_norm': True,
-    'pre_seq_len': 'None',
-    'prefix_projection': False,
-    'quantization_bit': 0,
-    'repetition_penalty': 1.0,
-    'rmsnorm': True,
-    'rotary_percent': 0.5,
-    'seq_length': 2048,
-    'softmax_compute_type': 'float32',
-    'top_k': 1,
-    'top_p': 1,
-    'type': 'ChatGLM2Config',
-    'use_flash_attention': False,
-    'use_past': False}
-
-
-def get_config():
-    """get instanced model config."""
-    return ChatGLM2Config(**BASE_CONFIG)
-
-
-def get_model(config):
-    """get instanced model."""
-    return ChatGLM2ForConditionalGeneration(config)
diff --git a/tests/st/test_model/test_codegeex2_model/test_auto_class.py b/tests/st/test_model/test_codegeex2_model/test_auto_class.py
deleted file mode 100644
index 1bcbcf93..00000000
--- a/tests/st/test_model/test_codegeex2_model/test_auto_class.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the codegeex2 interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_codegeex2_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindspore import context
-
-from mindformers import MindFormerBook, AutoModel, PreTrainedModel, AutoConfig, PretrainedConfig, AutoTokenizer, \
-    PreTrainedTokenizerBase
-
-
-class TestGLMAutoClassMethod:
-    """A test class for testing Model classes"""
-
-    def setup_method(self):
-        """setup method."""
-        context.set_context(mode=0)
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_llm_list = ["codegeex2_6b"]
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_llm_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_codegeex2_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get LL-Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        # Too time-cost, not used for now.
-        for model_type in self.test_llm_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + "_model",
-            )
-
-    def test_codegeex2_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_llm_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + "_config",
-            )
-
-
-    def test_codegeex2_tokenizer(self):
-        """
-        Feature: AutoTokenizer, input config.
-        Description: Test to get tokenizer instance by input tokenizer type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        tokenizer_type = "codegeex2_6b"
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_type)
-        assert isinstance(tokenizer, PreTrainedTokenizerBase)
-        tokenizer.save_pretrained(
-            save_directory=os.path.join(self.save_directory, tokenizer_type),
-            save_name=tokenizer_type + "_tokenizer",
-        )
diff --git a/tests/st/test_model/test_codegeex2_model/test_eval.py b/tests/st/test_model/test_codegeex2_model/test_eval.py
deleted file mode 100644
index 0f0391b4..00000000
--- a/tests/st/test_model/test_codegeex2_model/test_eval.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test codegeex2 evaluate.
-How to run this:
-    pytest tests/st/test_model/test_codegeex2_model/test_eval.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestCodeGeeX2Eval:
-    """A test class for testing model evaluate."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model evaluate
-        Description: Test base model evaluate.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='eval', batch_size=4, use_label=True)
-
-        model_config = get_config()
-        model_config.batch_size = runner.batch_size  # set batch size for prediction
-        model_config.vocab_size = 32000  # default to use llama2 tokenizer
-        model_config.padded_vocab_size = model_config.vocab_size
-
-        model = get_model(model_config)
-
-        runner.set_eval(model, model_config, metric='PerplexityMetric')
diff --git a/tests/st/test_model/test_codegeex2_model/test_predict.py b/tests/st/test_model/test_codegeex2_model/test_predict.py
deleted file mode 100644
index 4b6b12f2..00000000
--- a/tests/st/test_model/test_codegeex2_model/test_predict.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test codegeex2 predict.
-How to run this:
-    pytest tests/st/test_model/test_codegeex2_model/test_predict.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestCodeGeeX2Predict:
-    """A test class for testing model prediction."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model predict
-        Description: Test base model prediction.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='predict', batch_size=1, experiment_mode=False)
-
-        model_config = get_config()
-        model_config.batch_size = runner.batch_size  # set batch size for prediction
-        model_config.vocab_size = 32000  # default to use llama2 tokenizer
-        model_config.padded_vocab_size = model_config.vocab_size
-
-        model = get_model(model_config)
-
-        outputs = 'hello world.?}iclopedia?}?}?}?}?}iclopediaiclopedia?}iclopedia?}?}iclopediaFCFC'
-        runner.set_predict(model=model, expect_outputs=outputs)
diff --git a/tests/st/test_model/test_codegeex2_model/test_train.py b/tests/st/test_model/test_codegeex2_model/test_train.py
deleted file mode 100644
index 7ae54ec2..00000000
--- a/tests/st/test_model/test_codegeex2_model/test_train.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test codegeex2 train.
-How to run this:
-    pytest tests/st/test_model/test_codegeex2_model/test_train.py
-"""
-import pytest
-import mindspore as ms
-from tests.utils.model_tester import ModelTester
-
-from .base_model import get_config, get_model
-
-ms.set_context(mode=0)
-
-
-class TestCodeGeeX2Train:
-    """A test class for testing model training precision."""
-
-    @pytest.mark.level1
-    @pytest.mark.platform_arm_ascend910b_training
-    @pytest.mark.env_onecard
-    def test_base_model(self):
-        """
-        Feature: Base model train
-        Description: Test base model training precision.
-        Expectation: AssertionError
-        """
-        runner = ModelTester(run_mode='train', batch_size=4, use_label=True, experiment_mode=False)
-
-        model_config = get_config()
-
-        loss_std = [11.298032, 11.289123, 11.289726, 11.276349, 11.286991,
-                    11.266462, 11.274662, 11.274948, 11.262068, 11.258616,
-                    11.253914, 11.257857, 11.259014, 11.240475, 11.241916,
-                    11.242459, 11.257033, 11.243998, 11.252337, 11.258551]
-
-        model = get_model(model_config)
-
-        model_config.seq_length -= 1  # set for generate data
-        runner.set_train(model, model_config, loss_std=loss_std)
diff --git a/tests/st/test_model/test_glm_lora_model/__init__.py b/tests/st/test_model/test_glm_lora_model/__init__.py
deleted file mode 100644
index cab403ce..00000000
--- a/tests/st/test_model/test_glm_lora_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test glm with lora."""
diff --git a/tests/st/test_model/test_glm_lora_model/test_glm_lora_trainer.py b/tests/st/test_model/test_glm_lora_model/test_glm_lora_trainer.py
deleted file mode 100644
index edc26fdd..00000000
--- a/tests/st/test_model/test_glm_lora_model/test_glm_lora_trainer.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the glm_lora interface used for mindformers.
-How to run this:
-pytest -v tests/st/test_model/test_glm_lora_model/test_glm_lora_trainer.py
-"""
-import numpy as np
-import pytest
-
-import mindspore as ms
-from mindspore.dataset import GeneratorDataset
-from mindformers.models.glm import GLMForPreTraining, GLMConfig
-from mindformers.pet.pet_config import LoraConfig
-from mindformers.pet import get_pet_model
-from mindformers import Trainer, TrainingArguments
-
-ms.set_context(mode=0)
-
-
-def generator_train():
-    """train dataset generator"""
-
-    # name is input_ids, shape is (bs, seq_len), dtype is Int32
-    # name is labels, shape is (bs, seq_len), dtype is Int32
-    # name is position_ids, shape is (bs, 2, seq_len), dtype is Int32
-    seq_len = 128
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len)).astype(np.int32)
-    labels = np.ones(seq_len).astype(np.int32)
-    position_ids = np.ones((2, seq_len)).astype(np.int32)
-    attention_mask = np.ones((1, seq_len, seq_len)).astype(np.int32)
-    train_data = (input_ids, labels, position_ids, attention_mask)
-    for _ in range(16):
-        yield train_data
-
-
-def generator_eval():
-    """eval dataset generator"""
-
-    # name is input_ids, shape is (8, 256), dtype is Int32
-    # name is labels, shape is (8, 256), dtype is Int32
-    seq_len = 128
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len)).astype(np.int32)
-    labels = np.ones_like(seq_len).astype(np.int32)
-    train_data = (input_ids, labels)
-    for _ in range(16):
-        yield train_data
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestGLMWithLoRATrainerMethod:
-    """A test class for testing pipeline."""
-
-    def setup_method(self):
-        """init task trainer."""
-        args = TrainingArguments(batch_size=4)
-        train_dataset = GeneratorDataset(generator_train,
-                                         column_names=["input_ids", "labels", "position_ids", "attention_mask"])
-        eval_dataset = GeneratorDataset(generator_eval, column_names=["input_ids", "labels"])
-        train_dataset = train_dataset.batch(batch_size=4)
-        eval_dataset = eval_dataset.batch(batch_size=4)
-
-        # set `vocab_size` to prevent generate token_id that out of vocab file
-        model_config = GLMConfig(num_layers=2, seq_length=128, vocab_size=120528)
-        model_config.pet_config = LoraConfig(lora_rank=8, lora_alpha=32, lora_dropout=0.1,
-                                             target_modules='.*query_key_value*')
-        model = GLMForPreTraining(model_config)
-        model = get_pet_model(model, model_config.pet_config)
-
-        self.task_trainer = Trainer(task='text_generation',
-                                    model=model,
-                                    model_name='glm_6b_lora',
-                                    args=args,
-                                    train_dataset=train_dataset,
-                                    eval_dataset=eval_dataset)
-
-    @pytest.mark.run(order=1)
-    def test_finetune(self):
-        """
-        Feature: Trainer.finetune()
-        Description: Test trainer for finetune.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.config.runner_config.epochs = 1
-        self.task_trainer.finetune()
-
-    @pytest.mark.run(order=2)
-    def test_predict(self):
-        """
-        Feature: Trainer.predict()
-        Description: Test trainer for predict.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.predict(input_data="hello world!", max_length=20, repetition_penalty=1, top_k=3, top_p=1)
diff --git a/tests/st/test_model/test_glm_model/__init__.py b/tests/st/test_model/test_glm_model/__init__.py
deleted file mode 100644
index 76f031a5..00000000
--- a/tests/st/test_model/test_glm_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test glm."""
diff --git a/tests/st/test_model/test_glm_model/test_auto_class.py b/tests/st/test_model/test_glm_model/test_auto_class.py
deleted file mode 100644
index 9f7f4c75..00000000
--- a/tests/st/test_model/test_glm_model/test_auto_class.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the glm interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_glm_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindspore import context
-
-from mindformers import MindFormerBook
-from mindformers import AutoModel
-from mindformers import AutoConfig, AutoTokenizer, AutoProcessor
-from mindformers.models import PreTrainedModel
-from mindformers.models import PretrainedConfig, PreTrainedTokenizerBase, ProcessorMixin
-
-
-class TestGLMAutoClassMethod:
-    """A test class for testing Model classes"""
-
-    def setup_method(self):
-        """setup method."""
-        context.set_context(mode=0)
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_llm_list = ["glm_6b", "glm_6b_chat"]
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_llm_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_glm_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get LL-Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        # Too time-cost, not used for now.
-        for model_type in self.test_llm_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + "_model",
-            )
-
-    def test_glm_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_llm_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + "_config",
-            )
-
-    def test_glm_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        processor_type = "glm_6b"
-        processor = AutoProcessor.from_pretrained(processor_type)
-        assert isinstance(processor, ProcessorMixin)
-        processor.save_pretrained(
-            save_directory=os.path.join(self.save_directory, processor_type),
-            save_name=processor_type + "_processor",
-        )
-
-    def test_glm_tokenizer(self):
-        """
-        Feature: AutoTokenizer, input config.
-        Description: Test to get tokenizer instance by input tokenizer type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        tokenizer_type = "glm_6b"
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_type)
-        assert isinstance(tokenizer, PreTrainedTokenizerBase)
-        tokenizer.save_pretrained(
-            save_directory=os.path.join(self.save_directory, tokenizer_type),
-            save_name=tokenizer_type + "_tokenizer",
-        )
diff --git a/tests/st/test_model/test_glm_model/test_pipeline.py b/tests/st/test_model/test_glm_model/test_pipeline.py
deleted file mode 100644
index 48acdc7e..00000000
--- a/tests/st/test_model/test_glm_model/test_pipeline.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the glm interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_glm_model/test_pipeline.py
-"""
-from mindspore import context
-
-from mindformers import pipeline
-
-
-class TestGLMPipelineMethod:
-    """A test class for testing pipeline."""
-    def setup_method(self):
-        """setup method."""
-        context.set_context(mode=0)
-        self.test_llm_list = ['glm_6b', 'glm_6b_chat']
-
-    def test_pipeline(self):
-        """
-        Feature: pipeline.
-        Description: Test pipeline by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # Too time-cost, not used for now.
-        for model_type in self.test_llm_list:
-            task_pipeline = pipeline(task='text_generation', model=model_type, max_length=20)
-            task_pipeline("你好", top_k=3)
diff --git a/tests/st/test_model/test_glm_model/test_trainer.py b/tests/st/test_model/test_glm_model/test_trainer.py
deleted file mode 100644
index 074d1dc2..00000000
--- a/tests/st/test_model/test_glm_model/test_trainer.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the glm interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_glm_model/test_trainer.py
-"""
-import numpy as np
-import pytest
-
-from mindspore import context
-from mindspore.dataset import GeneratorDataset
-
-from mindformers import AutoTokenizer
-from mindformers import GLMForPreTraining, GLMChatModel, GLMConfig
-from mindformers import Trainer, TrainingArguments
-
-
-def generator_train():
-    """train dataset generator"""
-    seq_len = 128
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    label = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    position_ids = np.ones((2, seq_len)).astype(np.int64)
-    attention_mask = np.ones(shape=(seq_len, seq_len)).astype(np.int32)
-    train_data = (input_ids, label, position_ids, attention_mask)
-    for _ in range(512):
-        yield train_data
-
-
-def generator_eval():
-    """eval dataset generator"""
-    seq_len = 512
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    label = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    eval_data = (input_ids, label)
-    for _ in range(8):
-        yield eval_data
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestGLMTrainerMethod:
-    """A test class for testing pipeline."""
-
-    def setup_method(self):
-        """init task trainer."""
-        context.set_context(mode=0)
-
-        args = TrainingArguments(num_train_epochs=1, batch_size=2)
-        train_dataset = GeneratorDataset(generator_train,
-                                         column_names=["input_ids", "label", "position_ids", "attention_mask"])
-        eval_dataset = GeneratorDataset(generator_eval, column_names=["input_ids", "label"])
-        train_dataset = train_dataset.batch(batch_size=2)
-        eval_dataset = eval_dataset.batch(batch_size=2)
-
-        model_config = GLMConfig(num_layers=2, hidden_size=32, inner_hidden_size=None,
-                                 num_heads=2, position_encoding_2d=True, vocab_size=120528)
-        model = GLMForPreTraining(model_config)
-        self.tokenizer = AutoTokenizer.from_pretrained("glm_6b")
-        self.task_trainer = Trainer(task='text_generation',
-                                    model=model,
-                                    model_name='glm_6b',
-                                    tokenizer=self.tokenizer,
-                                    args=args,
-                                    train_dataset=train_dataset,
-                                    eval_dataset=eval_dataset)
-
-    @pytest.mark.run(order=1)
-    def test_train(self):
-        """
-        Feature: Trainer.train()
-        Description: Test trainer for train.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.train()
-
-    # @pytest.mark.run(order=2)
-    # def test_eval(self):
-    #     """
-    #     Feature: Trainer.evaluate()
-    #     Description: Test trainer for evaluate.
-    #     Expectation: TypeError, ValueError, RuntimeError
-    #     """
-    #     self.task_trainer.evaluate()
-
-    @pytest.mark.run(order=3)
-    def test_predict(self):
-        """
-        Feature: Trainer.predict()
-        Description: Test trainer for predict.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        model_config = GLMConfig(num_layers=1, hidden_size=32, inner_hidden_size=None,
-                                 num_heads=2, position_encoding_2d=True, vocab_size=120528)
-        model = GLMChatModel(model_config)
-        task_trainer = Trainer(task='text_generation',
-                               model=model,
-                               tokenizer=self.tokenizer)
-        task_trainer.predict(input_data="你好", max_length=20)
-
-    @pytest.mark.run(order=4)
-    def test_finetune(self):
-        """
-        Feature: Trainer.finetune()
-        Description: Test trainer for finetune.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.finetune()
diff --git a/tests/st/test_model/test_mae_model/__init__.py b/tests/st/test_model/test_mae_model/__init__.py
deleted file mode 100644
index e66ad9ac..00000000
--- a/tests/st/test_model/test_mae_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test mae."""
diff --git a/tests/st/test_model/test_mae_model/test_auto_class.py b/tests/st/test_model/test_mae_model/test_auto_class.py
deleted file mode 100644
index dd21a27c..00000000
--- a/tests/st/test_model/test_mae_model/test_auto_class.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the swin interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_mae_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, ProcessorMixin
-
-
-class TestMaeAutoClassMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_list = ['mae_vit_base_p16']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_auto_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_auto_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
diff --git a/tests/st/test_model/test_pangualpha_model/__init__.py b/tests/st/test_model/test_pangualpha_model/__init__.py
deleted file mode 100644
index 3abb1dc6..00000000
--- a/tests/st/test_model/test_pangualpha_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test pangualpha."""
diff --git a/tests/st/test_model/test_pangualpha_model/test_pangualpha_autoclass.py b/tests/st/test_model/test_pangualpha_model/test_pangualpha_autoclass.py
deleted file mode 100644
index da663b82..00000000
--- a/tests/st/test_model/test_pangualpha_model/test_pangualpha_autoclass.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the gpt interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_llm_model/test_auto_class.py
-"""
-import os
-import shutil
-# pylint: disable=W0611
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoTokenizer, AutoProcessor
-# pylint: disable=W0611
-from mindformers.models import PreTrainedModel, PretrainedConfig, PreTrainedTokenizerBase, ProcessorMixin
-
-
-class TestAutoClassMethod:
-    '''A test class for testing Model classes'''
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_llm_list = ['pangualpha_2_6b']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_llm_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_llm_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get LL-Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_llm_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_llm_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_llm_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_llm_processor(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_llm_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
-
-    def test_llm_tokenizer(self):
-        """
-        Feature: AutoTokenizer, input config.
-        Description: Test to get tokenizer instance by input tokenizer type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for tokenizer_type in self.test_llm_list:
-            tokenizer = AutoTokenizer.from_pretrained(tokenizer_type)
-            assert isinstance(tokenizer, PreTrainedTokenizerBase)
-            tokenizer.save_pretrained(
-                save_directory=os.path.join(self.save_directory, tokenizer_type),
-                save_name=tokenizer_type + '_tokenizer')
diff --git a/tests/st/test_model/test_pangualpha_model/test_pangualpha_pipeline.py b/tests/st/test_model/test_pangualpha_model/test_pangualpha_pipeline.py
deleted file mode 100644
index 03902889..00000000
--- a/tests/st/test_model/test_pangualpha_model/test_pangualpha_pipeline.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the gpt interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_llm_model/test_pipeline.py
-"""
-# pylint: disable=W0611
-from mindformers import pipeline
-
-
-class TestPipelineMethod:
-    """A test class for testing pipeline."""
-    def setup_method(self):
-        """setup method."""
-        self.test_llm_list = ['pangualpha_2_6b']
-
-    def test_pipeline(self):
-        """
-        Feature: pipeline.
-        Description: Test pipeline by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        for model_type in self.test_llm_list:
-            task_pipeline = pipeline(task='text_generation', model=model_type, max_length=20)
-            task_pipeline("今天天气不错，适合", top_k=3)
diff --git a/tests/st/test_model/test_pangualpha_model/test_pangualpha_trainer.py b/tests/st/test_model/test_pangualpha_model/test_pangualpha_trainer.py
deleted file mode 100644
index 5e125ab0..00000000
--- a/tests/st/test_model/test_pangualpha_model/test_pangualpha_trainer.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the pangualpha interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_llm_model/test_pangualpha_trainer.py
-"""
-import numpy as np
-import pytest
-
-import mindspore as ms
-
-from mindspore.dataset import GeneratorDataset
-from mindformers.models.pangualpha.pangualpha import PanguAlphaHeadModel
-from mindformers.models.pangualpha.pangualpha_config import PanguAlphaConfig
-from mindformers import Trainer, TrainingArguments
-
-ms.set_context(mode=0)
-
-
-def generator_train():
-    """train dataset generator"""
-    seq_len = 129
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    position_ids = np.ones((seq_len)).astype(np.int32)
-    attention_mask = np.ones((seq_len, seq_len)).astype(np.int32)
-    train_data = (input_ids, position_ids, attention_mask)
-    for _ in range(16):
-        yield train_data
-
-
-def generator_eval():
-    """eval dataset generator"""
-    seq_len = 128
-    input_ids = np.random.randint(low=0, high=15, size=(seq_len,)).astype(np.int32)
-    # input_mask = np.ones_like(input_ids)
-    train_data = (input_ids)
-    for _ in range(16):
-        yield train_data
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestPanguAlphaTrainerMethod:
-    """A test class for testing pipeline."""
-
-    def setup_method(self):
-        """init task trainer."""
-        args = TrainingArguments(batch_size=4, num_train_epochs=1)
-        train_dataset = GeneratorDataset(generator_train, column_names=["input_ids", "position_id", "attention_mask"])
-        eval_dataset = GeneratorDataset(generator_eval, column_names=["input_ids"])
-        train_dataset = train_dataset.batch(batch_size=4)
-        eval_dataset = eval_dataset.batch(batch_size=4)
-
-        model_config = PanguAlphaConfig(num_layers=2,
-                                        hidden_size=128,
-                                        ffn_hidden_size=128*4,
-                                        num_heads=2,
-                                        seq_length=128)
-        model = PanguAlphaHeadModel(model_config)
-
-        self.task_trainer = Trainer(task='text_generation',
-                                    model=model,
-                                    args=args,
-                                    train_dataset=train_dataset,
-                                    eval_dataset=eval_dataset)
-
-    def test_train(self):
-        """
-        Feature: Trainer.train()
-        Description: Test trainer for train.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.config.runner_config.epochs = 1
-        self.task_trainer.train()
-
-    def test_eval(self):
-        """
-        Feature: Trainer.evaluate()
-        Description: Test trainer for evaluate.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.model.set_train(False)
-        self.task_trainer.evaluate()
-
-    def test_predict(self):
-        """
-        Feature: Trainer.predict()
-        Description: Test trainer for predict.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.predict(input_data="今天天气如何？", max_length=20, repetition_penalty=1, top_k=3, top_p=1)
-
-    def test_finetune(self):
-        """
-        Feature: Trainer.finetune()
-        Description: Test trainer for finetune.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.config.runner_config.epochs = 1
-        self.task_trainer.finetune(finetune_checkpoint=True)
diff --git a/tests/st/test_model/test_qa_model/__init__.py b/tests/st/test_model/test_qa_model/__init__.py
deleted file mode 100644
index 619b10c1..00000000
--- a/tests/st/test_model/test_qa_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test_qa_model."""
diff --git a/tests/st/test_model/test_qa_model/test_qa_model.py b/tests/st/test_model/test_qa_model/test_qa_model.py
deleted file mode 100644
index 978c73f1..00000000
--- a/tests/st/test_model/test_qa_model/test_qa_model.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test Module for testing functions of AutoModel and BertForQuestionAnswering class
-
-How to run this:
-windows:  pytest .\\tests\\st\\test_model\\test_qa_model\\test_qa_model.py
-linux:  pytest ./tests/st/test_model/test_qa_model/test_qa_model.py
-"""
-import os
-
-from mindformers import MindFormerBook, AutoModel, AutoConfig
-from mindformers.models import BertForQuestionAnswering, PreTrainedModel
-from mindformers.tools import logger
-
-
-class TestModelMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """get_input"""
-        # fine-tuning
-        self.qa_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                              'qa')
-        self.qa_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                           'configs', 'qa', 'run_qa_bert_base_uncased.yaml')
-        self.qa_config = AutoConfig.from_pretrained('qa_bert_base_uncased')
-
-        # evaluation and prediction
-        self.qa_squad_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                                    'qa')
-        self.qa_squad_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                                 'configs', 'qa', 'run_qa_bert_base_uncased.yaml')
-        self.qa_squad_config = AutoConfig.from_pretrained('qa_bert_base_uncased_squad')
-
-        # save path
-        self.save_directory = os.path.join(MindFormerBook.get_default_checkpoint_save_folder(), 'qa')
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel, from_pretrained, from_config
-        Description: Test to get model instance by AutoModel.from_pretrained
-                    and AutoModel.from_config
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        AutoModel.show_support_list()
-        support_list = AutoModel.get_support_list()
-        logger.info(support_list)
-
-        # fine-tuning part
-        # input model name
-        qa_model_a = AutoModel.from_pretrained('qa_bert_base_uncased', download_checkpoint=False)
-        # input yaml path
-        qa_model_c = AutoModel.from_config(self.qa_config_path, download_checkpoint=False)
-
-        qa_model_a.save_pretrained(self.save_directory, save_name='qa_bert_base_uncased')
-
-        assert isinstance(qa_model_a, BertForQuestionAnswering)
-        assert isinstance(qa_model_c, BertForQuestionAnswering)
-        assert isinstance(qa_model_a, PreTrainedModel)
-        assert isinstance(qa_model_c, PreTrainedModel)
-
-        # evaluation and prediction test part
-        # input model name
-        qa_squad_model_a = AutoModel.from_pretrained('qa_bert_base_uncased_squad', download_checkpoint=False)
-        # # input yaml path
-        qa_squad_model_c = AutoModel.from_config(self.qa_squad_config_path, download_checkpoint=False)
-
-        qa_squad_model_a.save_pretrained(self.save_directory, save_name='qa_bert_base_uncased_squad')
-
-        assert isinstance(qa_squad_model_a, BertForQuestionAnswering)
-        assert isinstance(qa_squad_model_c, BertForQuestionAnswering)
-        assert isinstance(qa_squad_model_a, PreTrainedModel)
-        assert isinstance(qa_squad_model_c, PreTrainedModel)
diff --git a/tests/st/test_model/test_sam_model/__init__.py b/tests/st/test_model/test_sam_model/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/st/test_model/test_sam_model/test_auto_class.py b/tests/st/test_model/test_sam_model/test_auto_class.py
deleted file mode 100644
index a607af8b..00000000
--- a/tests/st/test_model/test_sam_model/test_auto_class.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the swin interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_swin_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, ProcessorMixin
-
-
-class TestSamAutoClassMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_list = ['sam_vit_b']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_auto_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_auto_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
diff --git a/tests/st/test_model/test_sam_model/test_pipeline.py b/tests/st/test_model/test_sam_model/test_pipeline.py
deleted file mode 100644
index fb5154e7..00000000
--- a/tests/st/test_model/test_sam_model/test_pipeline.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the llama interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_llama_model/test_pipeline.py
-"""
-import cv2
-import numpy as np
-
-import mindspore as ms
-
-from mindformers import pipeline
-
-ms.set_context(mode=0)
-
-
-class TestSamPipelineMethod:
-    """A test class for testing pipeline."""
-    def setup_method(self):
-        """setup method."""
-        self.test_llm_list = ['sam_vit_b']
-
-    def test_pipeline(self):
-        """
-        Feature: pipeline.
-        Description: Test pipeline by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        for model_type in self.test_llm_list:
-            task_pipeline = pipeline(task='segment_anything', model=model_type)
-
-            image = cv2.imread("scripts/examples/segment_anything/images/truck.jpg")
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
-            # 0.使用task_pipeline.set_image提前抽取图像特征
-            task_pipeline.set_image(image)
-
-            # 1. 单点确定一个物体
-            input_point = np.array([[500, 375]])
-            input_label = np.array([1])
-            task_pipeline({"points": input_point,
-                           "labels": input_label},
-                          multimask_output=True)
-
-            # 2.两点确定相同物体
-            input_point = np.array([[500, 375], [1125, 625]])
-            input_label = np.array([1, 1])
-            task_pipeline({"points": input_point,
-                           "labels": input_label},
-                          multimask_output=False)
-
-            # 3.两点确定不同物体
-            input_point = np.array([[[500, 375]],
-                                    [[1125, 625]]])
-            input_label = np.array([[1], [1]])
-            task_pipeline({"points": input_point,
-                           "labels": input_label},
-                          multimask_output=False)
-
-            # 4.一个前景点和背景点
-            input_point = np.array([[500, 375], [1125, 625]])
-            input_label = np.array([1, 0])
-            task_pipeline({"points": input_point,
-                           "labels": input_label},
-                          multimask_output=False)
-
-            # 5.单框确定一个物体
-            input_box = np.array([425, 600, 700, 875])
-            task_pipeline({"boxes": input_box},
-                          multimask_output=False)
-
-            # 6.框和背景点确定物体
-            input_box = np.array([425, 600, 700, 875])
-            input_point = np.array([[575, 750]])
-            input_label = np.array([0])
-            task_pipeline({"points": input_point,
-                           "labels": input_label,
-                           "boxes": input_box},
-                          multimask_output=False)
-
-            # 7.多组框和点确定不同物体
-            input_boxes = np.array([[425, 600, 700, 875],
-                                    [1360, 525, 1680, 780]])
-            input_points = np.array([[[575, 750]],
-                                     [[1525, 670]]])
-            input_labels = np.array([[1], [1]])
-            task_pipeline({"points": input_points,
-                           "labels": input_labels,
-                           "boxes": input_boxes},
-                          multimask_output=False)
-
-            # 8.多个框确定不同物体
-            input_boxes = np.array([[75, 275, 1725, 850],
-                                    [425, 600, 700, 875],
-                                    [1375, 550, 1650, 800],
-                                    [1240, 675, 1400, 750]])
-            task_pipeline({"boxes": input_boxes},
-                          multimask_output=False)
-
-            # 单点确定一个物体: 传入cv2图像和prompt
-            input_point = np.array([[500, 375]])
-            input_label = np.array([1])
-
-            image = cv2.imread("scripts/examples/segment_anything/images/truck.jpg")
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-            task_pipeline({"image": image,
-                           "points": input_point,
-                           "labels": input_label},
-                          multimask_output=True)
-
-            # 单点确定一个物体: 传入图像路径和prompt
-            task_pipeline({"image": "scripts/examples/segment_anything/images/truck.jpg",
-                           "points": input_point,
-                           "labels": input_label},
-                          multimask_output=True)
diff --git a/tests/st/test_model/test_swin_model/__init__.py b/tests/st/test_model/test_swin_model/__init__.py
deleted file mode 100644
index ecf2ba75..00000000
--- a/tests/st/test_model/test_swin_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test swin."""
diff --git a/tests/st/test_model/test_swin_model/test_auto_class.py b/tests/st/test_model/test_swin_model/test_auto_class.py
deleted file mode 100644
index 141e4de8..00000000
--- a/tests/st/test_model/test_swin_model/test_auto_class.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the swin interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_swin_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, ProcessorMixin
-
-
-class TestSwinAutoClassMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_list = ['swin_base_p4w7']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_auto_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_auto_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
diff --git a/tests/st/test_model/test_t5_model/__init__.py b/tests/st/test_model/test_t5_model/__init__.py
deleted file mode 100644
index 2ba1f7fa..00000000
--- a/tests/st/test_model/test_t5_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test t5."""
diff --git a/tests/st/test_model/test_t5_model/test_t5_generation.py b/tests/st/test_model/test_t5_model/test_t5_generation.py
deleted file mode 100644
index 2885e21c..00000000
--- a/tests/st/test_model/test_t5_model/test_t5_generation.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test Module for testing functions of Generator model class
-
-How to run this:
-linux:  pytest ./tests/st/test_model/test_t5_model/test_t5_generation.py
-
-"""
-import pytest
-import mindspore as ms
-
-from mindformers.models import T5ForConditionalGeneration, T5Tokenizer
-
-ms.set_context(mode=0)
-
-
-def modify_batch_size(net, batch_size):
-    if hasattr(net, 'batch_size'):
-        net.batch_size = batch_size
-    for cell in net.cells():
-        modify_batch_size(cell, batch_size)
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestGeneratorUseT5:
-    """A test class for testing Model classes"""
-    def setup_class(self):
-        self.t5 = T5ForConditionalGeneration.from_pretrained("t5_small")
-        self.tokenizer = T5Tokenizer.from_pretrained("t5_small")
-
-    @pytest.mark.parametrize('do_sample', [False, True])
-    def test_single_inference(self, do_sample):
-        """
-        Feature: Test input as single example for generator
-        Description: single example inference
-        Expectation: ValueError, AttributeError
-        """
-        words = "translate the English to the Romanian: UN Chief Says There Is No Military Solution in Syria"
-        words = self.tokenizer(words, max_length=21, padding='max_length')['input_ids']
-        modify_batch_size(self.t5, batch_size=1)
-        output = self.t5.generate(words, do_sample=do_sample)
-        output = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        assert output == "eful ONU declară că nu există o soluţie militară în Siria"
diff --git a/tests/st/test_model/test_t5_model/test_t5_tokenizer.py b/tests/st/test_model/test_t5_model/test_t5_tokenizer.py
deleted file mode 100644
index f1db2c18..00000000
--- a/tests/st/test_model/test_t5_model/test_t5_tokenizer.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test Module for testing Tokenizer class
-
-How to run this:
-linux:  pytest ./tests/st/test_model/test_t5_model/test_t5_tokenizer.py
-"""
-from mindformers import T5Tokenizer, AutoTokenizer
-
-
-class TestT5TokenizerMethod:
-    """A test class for testing the BertTokenizer"""
-    def setup_method(self):
-        self.tokenizer = T5Tokenizer.from_pretrained('t5_small')
-        self.auto_tokenizer = AutoTokenizer.from_pretrained('t5_small')
-
-    def test_from_pretrained_tokenizer(self):
-        """
-        Feature: The T5Tokenizer test using from python class
-        Description: Using call forward process of the tokenizer without error
-        Expectation: The returned ret is not equal to [[6, 7]].
-        """
-        self.tokenizer.show_support_list()
-        self.tokenizer("hello world")
-
-    def test_auto_tokenizer(self):
-        """
-        Feature: The T5Tokenizer test using auto_class
-        Description: Using call forward process of the tokenizer without error
-        Expectation: The returned ret is not equal to [[6, 7]].
-        """
-        self.auto_tokenizer("hello world")
diff --git a/tests/st/test_model/test_t5_model/test_t5_trainer.py b/tests/st/test_model/test_t5_model/test_t5_trainer.py
deleted file mode 100644
index 4a3a1962..00000000
--- a/tests/st/test_model/test_t5_model/test_t5_trainer.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the t5 trainer used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_t5_model/test_t5_trainer.py
-"""
-import os
-import shutil
-
-import numpy as np
-import pytest
-import mindspore as ms
-from mindspore.dataset import MindDataset, GeneratorDataset
-from mindspore.mindrecord import FileWriter
-
-from mindformers.trainer import Trainer, \
-    TranslationTrainer, TrainingArguments
-from mindformers import T5Config, T5ForConditionalGeneration
-
-ms.set_context(mode=0)
-
-
-def generator(src_length=16, target_length=8):
-    """dataset generator"""
-    input_ids = np.random.randint(low=0, high=15, size=(src_length,)).astype(np.int32)
-    attention_mask = np.ones((src_length,)).astype(np.int32)
-    labels = np.random.randint(low=0, high=15, size=(target_length,)).astype(np.int32)
-
-    for _ in range(2):
-        yield input_ids, attention_mask, labels
-
-
-def write_mindrecord(ds_generator, data_record_path):
-    """Using the generator to get mindrecords"""
-    ds = GeneratorDataset(ds_generator, column_names=["input_ids", "attention_mask", "labels"])
-
-    writer = FileWriter(file_name=data_record_path, shard_num=1, overwrite=True)
-    data_schema = {"input_ids": {"type": "int32", "shape": [-1]},
-                   "attention_mask": {"type": "int32", "shape": [-1]},
-                   "labels": {"type": "int32", "shape": [-1]}}
-    writer.add_schema(data_schema, "test_schema")
-    for item in ds.create_dict_iterator():
-        for k in item.keys():
-            item[k] = item[k].asnumpy()
-        writer.write_raw_data([item])
-    writer.commit()
-
-
-def modify_attrs(net, key, value):
-    if hasattr(net, key):
-        setattr(net, key, value)
-        print(f"Set the net {net.__class__.__name__} with {key}:{value}")
-    for cell in net.cells():
-        modify_attrs(cell, key, value)
-
-
-def write_raw_text_data(stage, data_record_path):
-    """writes the fake translation data"""
-    source = ["We went through the whole range of emotions during this period.",
-              "The positive reaction of pilots and Federation officials makes me hope that this year we will "
-              "be organizing champions again"
-              " said rally manager, Dan Codreanu."]
-    target = ['Am trecut prin toată gama de trăiri în această perioadă.',
-              "Reacția pozitivă a piloților și oficialilor Federației mă face să sper că vom fi și în acest an "
-              "campion la organizare a spus managerul raliului, Dan Codreanu."]
-
-    src_path = os.path.join(data_record_path, f'{stage}.source')
-    tgt_path = os.path.join(data_record_path, f'{stage}.target')
-    with open(src_path, 'w') as sfp:
-        with open(tgt_path, 'w') as tfp:
-            for x, y in zip(source, target):
-                sfp.write(x + '\n')
-                tfp.write(y + '\n')
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestTranslationTrainer:
-    """Test Translation Trainer"""
-    def setup_class(self):
-        self.dir_path = os.path.join(os.path.dirname(__file__), 'fake_dataset')
-        os.makedirs(self.dir_path, exist_ok=True)
-        self.abs_path = os.path.join(self.dir_path, 't5_dataset')
-        write_mindrecord(generator(src_length=16, target_length=8), self.abs_path)
-
-        self.raw_text_path = os.path.join(os.path.dirname(__file__), 'raw_text_dataset')
-        os.makedirs(self.raw_text_path, exist_ok=True)
-        write_raw_text_data(stage='train', data_record_path=self.raw_text_path)
-
-    def teardown_class(self):
-        shutil.rmtree(self.dir_path, ignore_errors=True)
-        shutil.rmtree(self.raw_text_path, ignore_errors=True)
-
-    def get_mindfiles_from_path(self, dir_path):
-        dataset_files = []
-        for r, _, f in os.walk(dir_path):
-            for file in f:
-                if not file.endswith("db"):
-                    dataset_files.append(os.path.join(r, file))
-        return dataset_files
-
-    @pytest.mark.run(order=1)
-    def test_trainer_with_translation_args_train(self):
-        """
-        Feature: Create Trainer From Config
-        Description: Test Trainer API to train from config
-        Expectation: TypeError
-        """
-        batch_size = 1
-        config = TrainingArguments(num_train_epochs=1, batch_size=batch_size, seed=2022,
-                                   optim="adamw", adam_beta1=0.9, learning_rate=0.001)
-
-        dataset = MindDataset(dataset_files=self.get_mindfiles_from_path(self.dir_path),
-                              columns_list=["input_ids", "attention_mask", "labels"])
-        dataset = dataset.batch(batch_size=batch_size)
-        dataset = dataset.repeat(1)
-
-        model_config = T5Config(batch_size=batch_size, num_heads=8, num_layers=1, hidden_size=32,
-                                seq_length=16, max_decode_length=8)
-        # Model
-        model = T5ForConditionalGeneration(model_config)
-        mim_trainer = Trainer(task='translation',
-                              model=model,
-                              args=config,
-                              train_dataset=dataset)
-        mim_trainer.train()
-
-    @pytest.mark.run(order=2)
-    def test_trainer_predict(self):
-        """
-        Feature: Test Predict of the Trainer
-        Description: Test Predict
-        Expectation: TypeError
-        """
-        # change the length for quick training
-        model_config = T5Config(seq_length=32, max_decode_length=32)
-        model = T5ForConditionalGeneration(model_config)
-        mim_trainer = TranslationTrainer(model_name="t5_small")
-        mim_trainer.predict(input_data="hello world", network=model)
diff --git a/tests/st/test_model/test_tokcls_model/__init__.py b/tests/st/test_model/test_tokcls_model/__init__.py
deleted file mode 100644
index 17e19c02..00000000
--- a/tests/st/test_model/test_tokcls_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test_ner_model."""
diff --git a/tests/st/test_model/test_tokcls_model/test_tokcls_model.py b/tests/st/test_model/test_tokcls_model/test_tokcls_model.py
deleted file mode 100644
index da1d01a5..00000000
--- a/tests/st/test_model/test_tokcls_model/test_tokcls_model.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test Module for testing functions of AutoModel and BertForTokenClassification class
-
-How to run this:
-windows:  pytest .\\tests\\st\\test_model\\test_tokcls_model\\test_tokcls_model.py
-linux:  pytest ./tests/st/test_model/test_tokcls_model/test_tokcls_model.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig
-from mindformers.models import BertForTokenClassification
-from mindformers.tools import logger
-
-
-class TestModelMethod:
-    """A test class for testing Model classes"""
-    def setup_class(self):
-        """get_input"""
-        # fine-tuning
-        self.tokcls_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                                  'tokcls')
-        self.tokcls_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                               'configs', 'tokcls', 'run_tokcls_bert_base_chinese.yaml')
-        self.tokcls_config = AutoConfig.from_pretrained('tokcls_bert_base_chinese')
-
-        # evaluation and prediction
-        self.tokcls_cluener_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                                          'tokcls')
-        self.tokcls_cluener_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                                       'configs', 'tokcls', 'run_tokcls_bert_base_chinese_cluener.yaml')
-        self.tokcls_cluener_config = AutoConfig.from_pretrained('tokcls_bert_base_chinese_cluener')
-
-        # save path
-        self.save_directory = os.path.join(MindFormerBook.get_default_checkpoint_save_folder(), 'tokcls')
-
-    def teardown_class(self):
-        shutil.rmtree(self.save_directory, ignore_errors=True)
-
-    # the first method to load model, AutoModel
-    def test_auto_model(self):
-        """
-        Feature: AutoModel, from_pretrained, from_config
-        Description: Test to get model instance by AutoModel.from_pretrained
-                    and AutoModel.from_config
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        AutoModel.show_support_list()
-        support_list = AutoModel.get_support_list()
-        logger.info(support_list)
-
-        # fine-tuning part
-        # input model name
-        tokcls_model_a = AutoModel.from_pretrained('tokcls_bert_base_chinese', download_checkpoint=False)
-
-        tokcls_model_a.save_pretrained(self.save_directory, save_name='tokcls_bert_base_chinese')
-
-        assert isinstance(tokcls_model_a, BertForTokenClassification)
-
-        # evaluation and prediction test part
-        # input model name
-        tokcls_cluener_model_a = \
-            AutoModel.from_pretrained('tokcls_bert_base_chinese_cluener', download_checkpoint=False)
-
-        tokcls_cluener_model_a.save_pretrained(self.save_directory,
-                                               save_name='tokcls_bert_base_chinese_cluener')
-
-        assert isinstance(tokcls_cluener_model_a, BertForTokenClassification)
diff --git a/tests/st/test_model/test_tokcls_model/test_tokcls_tokenizer.py b/tests/st/test_model/test_tokcls_model/test_tokcls_tokenizer.py
deleted file mode 100644
index 198c461a..00000000
--- a/tests/st/test_model/test_tokcls_model/test_tokcls_tokenizer.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test Module for testing Tokenizer class
-
-How to run this:
-linux:  pytest ./tests/st/test_model/test_tokcls_model/test_tokcls_tokenizer.py
-"""
-
-from mindformers import BertTokenizer, AutoTokenizer
-
-
-class TestBertTokenizerForChineseMethod:
-    """A test class for testing the AutoTokenizer"""
-
-    def test_from_pretrained(self):
-        """
-        Feature: The BertTokenizer test using from python class
-        Description: Using call forward process of the tokenizer without error
-        Expectation: The returned ret is not equal to [[6, 7]].
-        """
-        tokenizer = BertTokenizer.from_pretrained('tokcls_bert_base_chinese')
-        tokenizer.show_support_list()
-        res = tokenizer.tokenize("你好，世界！")
-
-        assert isinstance(tokenizer, BertTokenizer)
-        assert res == ['你', '好', '，', '世', '界', '！']
-
-        tokenizer = BertTokenizer.from_pretrained('tokcls_bert_base_chinese_cluener')
-        tokenizer.show_support_list()
-        res = tokenizer.tokenize("你好，世界！")
-
-        assert isinstance(tokenizer, BertTokenizer)
-        assert res == ['你', '好', '，', '世', '界', '！']
-
-    def test_auto_tokenizer(self):
-        """
-        Feature: The BertTokenizer test using auto_class
-        Description: Using call forward process of the tokenizer without error
-        Expectation: The returned ret is not equal to [[6, 7]].
-        """
-        tokenizer = AutoTokenizer.from_pretrained('tokcls_bert_base_chinese')
-        tokenizer.show_support_list()
-        res = tokenizer.tokenize("你好，世界！")
-
-        assert isinstance(tokenizer, BertTokenizer)
-        assert res == ['你', '好', '，', '世', '界', '！']
-
-        tokenizer = AutoTokenizer.from_pretrained('tokcls_bert_base_chinese_cluener')
-        tokenizer.show_support_list()
-        res = tokenizer.tokenize("你好，世界！")
-
-        assert isinstance(tokenizer, BertTokenizer)
-        assert res == ['你', '好', '，', '世', '界', '！']
diff --git a/tests/st/test_model/test_txtcls_model/__init__.py b/tests/st/test_model/test_txtcls_model/__init__.py
deleted file mode 100644
index a113cefa..00000000
--- a/tests/st/test_model/test_txtcls_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test_txtcls_model."""
diff --git a/tests/st/test_model/test_txtcls_model/test_txtcls_model.py b/tests/st/test_model/test_txtcls_model/test_txtcls_model.py
deleted file mode 100644
index 3a8e12b1..00000000
--- a/tests/st/test_model/test_txtcls_model/test_txtcls_model.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright 2022 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-Test Module for testing functions of AutoModel and BertForMultipleChoice class
-
-How to run this:
-windows:  pytest .\\tests\\st\\test_model\\test_txtcls_model\\test_txtcls_model.py
-linux:  pytest ./tests/st/test_model/test_txtcls_model/test_txtcls_model.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig
-from mindformers.models import BertForMultipleChoice
-from mindformers.tools import logger
-
-
-class TestModelMethod:
-    """A test class for testing Model classes"""
-    def setup_class(self):
-        """get_input"""
-        # fine-tuning
-        self.txtcls_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                                  'txtcls')
-        self.txtcls_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                               'configs', 'txtcls', 'run_txtcls_bert_base_uncased.yaml')
-        self.txtcls_config = AutoConfig.from_pretrained('txtcls_bert_base_uncased')
-
-        # evaluation and prediction
-        self.txtcls_mnli_checkpoint_dir = os.path.join(MindFormerBook.get_default_checkpoint_download_folder(),
-                                                       'txtcls')
-        self.txtcls_mnli_config_path = os.path.join(MindFormerBook.get_project_path(),
-                                                    'configs', 'txtcls', 'run_txtcls_bert_base_uncased_mnli.yaml')
-        self.txtcls_mnli_config = AutoConfig.from_pretrained('txtcls_bert_base_uncased_mnli')
-
-        # save path
-        self.save_directory = os.path.join(MindFormerBook.get_default_checkpoint_save_folder(), 'txtcls')
-
-    def teardown_class(self):
-        shutil.rmtree(self.save_directory, ignore_errors=True)
-
-    # the first method to load model, AutoModel
-    def test_auto_model(self):
-        """
-        Feature: AutoModel, from_pretrained, from_config
-        Description: Test to get model instance by AutoModel.from_pretrained
-                    and AutoModel.from_config
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        AutoModel.show_support_list()
-        support_list = AutoModel.get_support_list()
-        logger.info(support_list)
-
-        # fine-tuning part
-        # input model name
-        txtcls_model_a = AutoModel.from_pretrained('txtcls_bert_base_uncased', download_checkpoint=False)
-
-        txtcls_model_a.save_pretrained(self.save_directory, save_name='txtcls_bert_base_uncased')
-
-        # evaluation and prediction test part
-        # input model name
-        txtcls_mnli_model_a = AutoModel.from_pretrained('txtcls_bert_base_uncased_mnli', download_checkpoint=False)
-
-        txtcls_mnli_model_a.save_pretrained(self.save_directory, save_name='txtcls_bert_base_uncased_mnli')
-
-        assert isinstance(txtcls_mnli_model_a, BertForMultipleChoice)
diff --git a/tests/st/test_model/test_vit_model/__init__.py b/tests/st/test_model/test_vit_model/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/st/test_model/test_vit_model/test_auto_class.py b/tests/st/test_model/test_vit_model/test_auto_class.py
deleted file mode 100644
index 64eba0db..00000000
--- a/tests/st/test_model/test_vit_model/test_auto_class.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the swin interface used for mindformers.
-How to run this:
-pytest tests/st/test_model/test_swin_model/test_auto_class.py
-"""
-import os
-import shutil
-from mindformers import MindFormerBook, AutoModel, AutoConfig, AutoProcessor
-from mindformers.models import PreTrainedModel, PretrainedConfig, ProcessorMixin
-
-
-class TestVitAutoClassMethod:
-    """A test class for testing Model classes"""
-    def setup_method(self):
-        """setup method."""
-        self.save_directory = MindFormerBook.get_default_checkpoint_save_folder()
-        self.test_list = ['vit_base_p16']
-
-    def teardown_method(self):
-        for model_or_config_type in self.test_list:
-            shutil.rmtree(os.path.join(self.save_directory, model_or_config_type), ignore_errors=True)
-
-    def test_auto_model(self):
-        """
-        Feature: AutoModel.
-        Description: Test to get Model instance by input model type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model name, load model and weights
-        for model_type in self.test_list:
-            model = AutoModel.from_pretrained(model_type, download_checkpoint=False)
-            assert isinstance(model, PreTrainedModel)
-            model.save_pretrained(
-                save_directory=os.path.join(self.save_directory, model_type),
-                save_name=model_type + '_model')
-
-    def test_auto_config(self):
-        """
-        Feature: AutoConfig.
-        Description: Test to get config instance by input config type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input model config name, load model and weights
-        for config_type in self.test_list:
-            model_config = AutoConfig.from_pretrained(config_type)
-            assert isinstance(model_config, PretrainedConfig)
-            model_config.save_pretrained(
-                save_directory=os.path.join(self.save_directory, config_type),
-                save_name=config_type + '_config')
-
-    def test_auto_processor(self):
-        """
-        Feature: AutoProcessor.
-        Description: Test to get processor instance by input processor type.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        # input processor name
-        for processor_type in self.test_list:
-            processor = AutoProcessor.from_pretrained(processor_type)
-            assert isinstance(processor, ProcessorMixin)
-            processor.save_pretrained(
-                save_directory=os.path.join(self.save_directory, processor_type),
-                save_name=processor_type + '_processor')
diff --git a/tests/st/test_model/test_wizardcoder_model/__init__.py b/tests/st/test_model/test_wizardcoder_model/__init__.py
deleted file mode 100644
index a9c247fa..00000000
--- a/tests/st/test_model/test_wizardcoder_model/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2023 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""test wizardcoder."""
diff --git a/tests/st/test_model/test_wizardcoder_model/test_trainer.py b/tests/st/test_model/test_wizardcoder_model/test_trainer.py
deleted file mode 100644
index 49711dc3..00000000
--- a/tests/st/test_model/test_wizardcoder_model/test_trainer.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the wizardcoder interface used for mindformers.
-How to run this:
-pytest --disable-warnings -vs tests/st/test_model/test_wizardcoder_model/test_trainer.py
-"""
-
-import os
-import sys
-import pytest
-
-import mindspore as ms
-
-from mindformers import Trainer, TrainingArguments
-
-
-def dir_path(path, times: int):
-    if times > 0:
-        return dir_path(os.path.dirname(path), times - 1)
-    return path
-
-
-wizardcoder_path = os.path.join(dir_path(__file__, 5), "research/wizardcoder")
-sys.path.append(wizardcoder_path)
-ms.set_context(mode=0)
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestWizardcoderTrainerMethod:
-    """A test class for testing trainer."""
-
-    def setup_method(self):
-        """init task trainer."""
-        from research.wizardcoder.wizardcoder import WizardCoderLMHeadModel
-        from research.wizardcoder.wizardcoder_config import WizardCoderConfig
-        args = TrainingArguments(batch_size=1, num_train_epochs=1)
-
-        model_config = WizardCoderConfig(num_layers=2, batch_size=1)
-        model = WizardCoderLMHeadModel(model_config)
-
-        self.task_trainer = Trainer(task='text_generation',
-                                    model=model,
-                                    args=args)
-
-    @pytest.mark.run(order=1)
-    def test_predict(self):
-        """
-        Feature: Trainer.predict()
-        Description: Test trainer for predict.
-        Expectation: TypeError, ValueError, RuntimeError
-        """
-        self.task_trainer.predict(input_data="hello world!", max_length=20, repetition_penalty=1, top_k=3, top_p=1)
diff --git a/tests/st/test_model/test_wizardcoder_model/test_training_precision.py b/tests/st/test_model/test_wizardcoder_model/test_training_precision.py
deleted file mode 100644
index 9b447985..00000000
--- a/tests/st/test_model/test_wizardcoder_model/test_training_precision.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright 2024 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""
-Test module for testing the wizardcoder interface used for mindformers.
-How to run this:
-pytest --disable-warnings -vs tests/st/test_model/test_wizardcoder_model/test_training_precision.py
-"""
-import os
-import sys
-import numpy as np
-import pytest
-
-import mindspore as ms
-from mindspore import set_seed
-from mindspore.dataset import GeneratorDataset
-
-from mindformers import Trainer, TrainingArguments, CosineWithWarmUpLR, FP32StateAdamWeightDecay
-from mindformers.trainer.optimizer_grouped_parameters import get_optimizer_grouped_parameters
-
-from tests.st.training_checker import TrainingChecker
-
-ms.set_context(mode=0)
-
-
-def dir_path(path, times: int):
-    if times > 0:
-        return dir_path(os.path.dirname(path), times - 1)
-    return path
-
-
-wizardcoder_path = os.path.join(dir_path(__file__, 5), "research/wizardcoder")
-sys.path.append(wizardcoder_path)
-ms.set_context(mode=0)
-
-def generator_train():
-    """train dataset generator"""
-    seq_len = 1025
-    step_num = 20
-    batch_size = 1
-    vocab_size = 49153
-    input_ids = np.random.randint(low=0, high=vocab_size, size=(step_num * batch_size, seq_len,)).astype(np.int32)
-    for idx in range(len(input_ids)):
-        yield input_ids[idx]
-
-
-@pytest.mark.level1
-@pytest.mark.platform_arm_ascend910b_training
-@pytest.mark.env_onecard
-class TestWizardcoderPrecision:
-    """A test class for testing training precision."""
-
-    def setup_method(self):
-        """init task trainer."""
-        set_seed(0)
-        np.random.seed(0)
-        from research.wizardcoder.wizardcoder import WizardCoderLMHeadModel
-        from research.wizardcoder.wizardcoder_config import WizardCoderConfig
-
-        args = TrainingArguments(batch_size=1, num_train_epochs=1)
-        train_dataset = GeneratorDataset(generator_train, column_names=["input_ids"])
-        train_dataset = train_dataset.batch(batch_size=1)
-
-        model_config = WizardCoderConfig(num_layers=2, batch_size=1)
-        model = WizardCoderLMHeadModel(model_config)
-
-        lr_schedule = CosineWithWarmUpLR(learning_rate=2.e-5, lr_end=1.e-6, warmup_steps=0, total_steps=20)
-        group_params = get_optimizer_grouped_parameters(model=model)
-        optimizer = FP32StateAdamWeightDecay(params=group_params,
-                                             beta1=0.9,
-                                             beta2=0.95,
-                                             eps=1.e-8,
-                                             learning_rate=lr_schedule)
-
-        loss_list_std = [10.871237, 10.868160, 10.860825, 10.848734, 10.861235,
-                         10.875328, 10.860098, 10.858349, 10.872917, 10.871431,
-                         10.861275, 10.871601, 10.874082, 10.858971, 10.849184,
-                         10.873372, 10.857426, 10.873322, 10.867041, 10.870938]
-        callback = TrainingChecker(loss_list_std=loss_list_std)
-
-        self.task_trainer = Trainer(task='text_generation',
-                                    model=model,
-                                    args=args,
-                                    train_dataset=train_dataset,
-                                    callbacks=callback,
-                                    optimizers=optimizer)
-
-    @pytest.mark.run(order=1)
-    def test_train(self):
-        """
-        Feature: Trainer.train()
-        Description: Test trainer for train.
-        Expectation: AssertionError
-        """
-        self.task_trainer.config.runner_config.epochs = 1
-        self.task_trainer.config.runner_config.sink_mode = False
-        self.task_trainer.config.runner_wrapper.scale_sense.loss_scale_value = 1024
-        self.task_trainer.config.callbacks = self.task_trainer.config.callbacks[:1]
-        self.task_trainer.train()
-- 
Gitee


From e3e4a1bd78a4a55c5ed45f694c88c1a1d98ea435 Mon Sep 17 00:00:00 2001
From: Yule100 <2538776509@qq.com>
Date: Thu, 18 Jul 2024 11:53:26 +0800
Subject: [PATCH 03/33] =?UTF-8?q?fixed=20ee25c20=20from=20https://gitee.co?=
 =?UTF-8?q?m/yule100/mindformers1/pulls/3564=20=E5=90=8C=E6=AD=A5=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0baichuan2=5F7b=E6=8E=A8=E7=90=86=E7=A4=BA=E4=BE=8B?=
 =?UTF-8?q?=E7=BB=93=E6=9E=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/baichuan2/baichuan2.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/research/baichuan2/baichuan2.md b/research/baichuan2/baichuan2.md
index bd8c7f13..61f17e3d 100644
--- a/research/baichuan2/baichuan2.md
+++ b/research/baichuan2/baichuan2.md
@@ -263,8 +263,8 @@ bash scripts/examples/baichuan2/run_baichuan2_predict.sh single \
  research/baichuan2/predict_baichuan2_7b.yaml \
  path/to/baichuan2_7b_chat.ckpt \
  path/to/tokenizer.model \
- "你好。"
-# 输出推理结果：你好，很高兴和您交流。请问有什么问题我可以帮助您解答？
+ "你是谁？"
+# 输出推理结果：我是百川大模型，是由百川智能的工程师们创造的大语言模型，我可以和人类进行自然交流、解答问题、协助创作，帮助大众轻松、普惠的获得世界知识和专业服务。如果你有任何问题，可以随时向我提问
 
 # baichuan2 13b
 bash scripts/examples/baichuan2/run_baichuan2_predict.sh single \
-- 
Gitee


From cceb6bb8d954082cb55e6d528f745b363ae6f9f9 Mon Sep 17 00:00:00 2001
From: Hsshuai <huangshengshuai@huawei.com>
Date: Tue, 16 Jul 2024 19:13:44 +0800
Subject: [PATCH 04/33] add ReleaseNote for version r1.2.0

---
 docs/RELEASE_NOTE.md | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 docs/RELEASE_NOTE.md

diff --git a/docs/RELEASE_NOTE.md b/docs/RELEASE_NOTE.md
new file mode 100644
index 00000000..69986c6c
--- /dev/null
+++ b/docs/RELEASE_NOTE.md
@@ -0,0 +1,43 @@
+# MindSpore Transformers 1.2.0 RELEASE NOTE
+
+以下为MindSpore Transformers (以下称为MindFormers) 套件 1.2.0 版本的变更日志，相较于1.1.0版本有以下关键新特性和bug
+fix。
+
+## 新特性
+
+- **新增模型支持带框架推理**：新增支持模型包含Qwen1.5_7b、Qwen1.5_14b、Qwen1.5_72b、Llama3_70b、Yi_34b等。
+- **新增模型支持bfloat16训练**：新增支持模型包含Qwen1.5_7b、Qwen1.5_14b、Qwen1.5_72b、Llama3_70b、Yi_34b等。
+- [AdamW优化器](https://gitee.com/mindspore/mindformers/pulls/3310)：新增AdamW优化器，对齐Megatron AdamW。
+- **支持MindIE进行服务化部署**：[MindIE](https://www.hiascend.com/software/mindie)，全称Mind Inference
+  Engine，是华为昇腾针对AI全场景业务的推理加速套件。MindFormers新增对MindIE的对接，承载在模型应用层MindIE-LLM，通过MindIE-Service对MindFormers中LLM模型进行部署。
+- [长序列训练](https://gitee.com/mindspore/mindformers/tree/r1.2.0/docs/feature_cards/Long_Sequence_Training.md)：新增支持长序列训练特性，通过在配置yaml文件中设置`parallel_config.context_parallel`开启序列并行，当前支持32k至256k。
+- [断点续训权重加载2.0](https://gitee.com/mindspore/mindformers/tree/r1.2.0/docs/feature_cards/Resume_Training.md)：断点续训场景下，新增指定续训权重功能，新增故障恢复下进行权重完整性校验并自动加载最新完整权重。
+- [权重自动转换2.0](https://gitee.com/mindspore/mindformers/tree/r1.2.0/docs/feature_cards/Transform_Ckpt.md)：自动权重转换新增多进程转换。
+
+## 新模型
+
+以下为新支持模型：
+
+| 模型                                                                                           | 规格                                                           |
+|----------------------------------------------------------------------------------------------|--------------------------------------------------------------|
+| [Mixtral](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/mixtral/mixtral.md)   | Mixtral_8x7b（32k预训练）                                         |
+| [Qwen1.5](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwen1_5/qwen1_5.md)   | Qwen1.5_7b（预训练、微调、推理）、Qwen1.5_14b（预训练、微调、推理）、Qwen1.5_72b（预训练） |
+| [Llama3](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/llama3/llama3.md)      | Llama3_70b（预训练、微调）                                           |
+| [Deepseek](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/deepseek/deepseek.md) | Deepseek_33b（微调）                                             |
+| [Yi](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/yi/yi.md)                  | Yi_6b（微调）、Yi_34b（微调）                                         |
+| [QwenVL](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwenvl/qwenvl.md)      | QwenVL_9.6b（微调、推理）                                           |
+
+## Bugfix
+
+在当前版本发布周期内，我们进行了模型/功能/易用性/文档等诸多方面的Bugfix，在此仅列举部分修复内容：
+
+- [!3345](https://gitee.com/mindspore/mindformers/pulls/3345)：修复`Linear`在`transpose_b=False`时传入正确`weight`仍报错问题。
+- [!3277](https://gitee.com/mindspore/mindformers/pulls/3277)：修复使用梯度累积时，`warpper`传入了错误的`micro_batch_num`问题。
+
+## 贡献者
+
+感谢以下人员做出的贡献：
+
+Chenhua Geng，heqinglin，koukairui，renyujin，shuchi，陈心锐，陈子恒，冯浩，胡思超，黄磊，黄生帅，黄勇，黄子灵，倪钰鑫，苏海波，李子垠，杨星宇，牛君豪，张森镇，张又文，谭纬城，吴致远，杨星宇，刘群，曹宇麟，方泽华，金仁操，刘群，李永文，钱驾宏，吴昊天，杨璇，汪家傲，范益，陈昱坤，李洋
+
+欢迎以任何形式对项目提供贡献！
-- 
Gitee


From 94d1eb25440f90c01b4905cadbef25e79a77da66 Mon Sep 17 00:00:00 2001
From: lzy0920232 <liziyin1@huawei.com>
Date: Mon, 22 Jul 2024 11:12:09 +0800
Subject: [PATCH 05/33] fix_text_1.2

---
 docs/feature_cards/Long_Sequence_Training.md | 2 +-
 mindformers/models/llama/llama_interleave.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/feature_cards/Long_Sequence_Training.md b/docs/feature_cards/Long_Sequence_Training.md
index 9679d6a1..678e7a1b 100644
--- a/docs/feature_cards/Long_Sequence_Training.md
+++ b/docs/feature_cards/Long_Sequence_Training.md
@@ -20,7 +20,7 @@
 - **context_parallel**: 序列并行
 - **pipeline_stage**: 流水线并行
 
-此外，当前序列并行方案暂未兼容词表模型并行切分，开启序列并行时需要将参数vocab_emb_dp设置为True。并且本序列并行方案与use_seq_parallel使能的序列并行方案。
+此外，当前序列并行方案暂未兼容词表模型并行切分，开启序列并行时需要将参数vocab_emb_dp设置为True。并且本序列并行方案与use_seq_parallel使能的序列并行方案暂不兼容。
 
 序列并行配置参考样例：
 
diff --git a/mindformers/models/llama/llama_interleave.py b/mindformers/models/llama/llama_interleave.py
index a0382848..87a045e9 100644
--- a/mindformers/models/llama/llama_interleave.py
+++ b/mindformers/models/llama/llama_interleave.py
@@ -245,7 +245,6 @@ class LLamaAttentionInterleave(nn.Cell):
                 self.merger_head_transpose.shard(in_strategy=layout_merger_head_transpose)
             else:
                 self.merger_head_transpose.shard(((dp, mp, 1, 1),))
-            self.merger_head_transpose.shard(((dp, mp, 1, 1),))
             self.batch_matmul_q_k.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
             self.batch_matmul.shard(((dp, mp, 1, 1), (dp, mp, 1, 1)))
             self.mul.shard(((dp, mp, 1, 1), ()))
-- 
Gitee


From 6cfbef2c18ad18f8aa910077a8cdd4f9ab394229 Mon Sep 17 00:00:00 2001
From: suhaibo <suhaibo1@huawei.com>
Date: Mon, 22 Jul 2024 06:29:23 +0000
Subject: [PATCH 06/33] update .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md.

Signed-off-by: suhaibo <suhaibo1@huawei.com>
---
 .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
index 66d43320..b89c2015 100644
--- a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
+++ b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
@@ -7,8 +7,12 @@
 ### 描述（做了什么，变更了什么）
 
 
-### 测试用例（新增、改动、可能影响的功能）
+### check list
 
+#### 是否完成方案评审或问题根因分析（Y/N）
 
+#### 是否完成了功能模块的UT/ST，并执行通过，附上结果（Y/N）
 
+#### 是否涉及公共组件或对外接口修改，涉及时需给出修改范围和影响评估（Y/N）
 
+#### 是否涉及资料修改，涉及时需同步修改（Y/N）
\ No newline at end of file
-- 
Gitee


From b743515774734211017858339d25cd13202d4357 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=89=9B=E5=90=9B=E8=B1=AA?= <niujunhao@huawei.com>
Date: Tue, 23 Jul 2024 21:53:40 +0800
Subject: [PATCH 07/33] =?UTF-8?q?=E3=80=90r1.2.0=E3=80=91=E7=89=88?=
 =?UTF-8?q?=E6=9C=AC=E9=85=8D=E5=A5=97=E4=BF=A1=E6=81=AF=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md            | 14 +++++++-------
 configs/README.md    | 26 +++++++++++++-------------
 docs/RELEASE_NOTE.md | 16 ++++++++--------
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 1faa832d..dc1f5163 100644
--- a/README.md
+++ b/README.md
@@ -22,8 +22,8 @@ MindSpore Transformers套件基于MindSpore内置的并行技术和组件化设
 
 如果您对MindSpore Transformers有任何建议，请通过issue与我们联系，我们将及时处理。
 
-- 📝 **[MindFormers教程文档](https://mindformers.readthedocs.io/zh_CN/latest)**
-- 📝 [大模型能力表一览](https://mindformers.readthedocs.io/zh-cn/latest/docs/model_support_list.html#llm)
+- 📝 **[MindFormers教程文档](https://mindformers.readthedocs.io/zh_CN/r1.2.0)**
+- 📝 [大模型能力表一览](https://mindformers.readthedocs.io/zh-cn/r1.2.0/docs/model_support_list.html#llm)
 - 📝 [MindPet指导教程](docs/feature_cards/Pet_Tuners.md)
 - 📝 [AICC指导教程](docs/readthedocs/source_zh_cn/docs/practice/AICC.md)
 
@@ -363,11 +363,11 @@ MindFormers已支持大部分模型的[LoRA微调](docs/feature_cards/Pet_Tuners
 
 当前支持的硬件为[Atlas 800T A2](https://www.hiascend.com/hardware/ai-server?tag=900A2)训练服务器。
 
-当前套件建议使用的Python版本为3.9。
+当前套件建议使用的Python版本为3.10。
 
-| MindFormers | MindPet |  MindSpore  | CANN | 驱动固件 | 镜像链接 | 备注          |
-|:-----------:|:-------:|:-----------:|:----:|:----:|:----:|-------------|
-|     dev     |  1.0.4  | 2.3版本(尚未发布) | 尚未发布 | 尚未发布 |  /   | 开发分支(非稳定版本) |
+| MindFormers | MindPet |                 MindSpore                  |                                                                                                                                           CANN                                                                                                                                            |                                  驱动固件                                  |                                 镜像链接                                  | 备注   |
+|:-----------:|:-------:|:------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------:|:---------------------------------------------------------------------:|------|
+|   r1.2.0    |  1.0.4  | [2.3.0](https://www.mindspore.cn/install/) | 8.0.RC2.beta1 <br/> [aarch64](https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.RC2/Ascend-cann-toolkit_8.0.RC2_linux-aarch64.run) <br/> [x86_64](https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.RC2/Ascend-cann-toolkit_8.0.RC2_linux-x86_64.run) | [driver](https://www.hiascend.com/hardware/firmware-drivers/community) | [image](http://mirrors.cn-central-221.ovaijisuan.com/detail/138.html) | 版本分支 |
 
 **当前MindFormers仅支持如上的软件配套关系**。其中CANN和固件驱动的安装需与使用的机器匹配，请注意识别机器型号，选择对应架构的版本。
 
@@ -385,7 +385,7 @@ bash build.sh
 
 MindFormers支持模型启动预训练、微调、推理、评测等功能，可点击[支持模型](#支持模型)中模型名称查看文档完成上述任务，以下为模型分布式启动方式的说明与示例。
 
-MindFormers推荐使用分布式方式拉起模型训练、推理等功能，目前提供`scripts/msrun_launcher.sh`分布式启动脚本作为模型的主要启动方式，`msrun`特性说明可以参考[msrun启动](https://www.mindspore.cn/tutorials/experts/zh-CN/r2.3.0rc2/parallel/msrun_launcher.html)。
+MindFormers推荐使用分布式方式拉起模型训练、推理等功能，目前提供`scripts/msrun_launcher.sh`分布式启动脚本作为模型的主要启动方式，`msrun`特性说明可以参考[msrun启动](https://www.mindspore.cn/tutorials/experts/zh-CN/r2.3.0/parallel/msrun_launcher.html)。
 该脚本主要输入参数说明如下：
 
   | **参数**           | **单机是否必选** | **多机是否必选** |     **默认值**      | **说明**           |
diff --git a/configs/README.md b/configs/README.md
index 21e4edcf..e16af710 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -24,13 +24,13 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
 
 ## 详细配置说明
 
-- seed: 随机种子，可以参考[mindspore.set_seed](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.set_seed.html)
+- seed: 随机种子，可以参考[mindspore.set_seed](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.set_seed.html)
 - run_mode: 运行模式，可选"train"、"finetune"、"eval"或"predict"
 - output_dir: './output'  保存checkpoint、strategy的路径
 - load_checkpoint: 加载权重的模型名或权重路径，若进行全参微调/推理，支持传入完整权重路径或离线切分完成的权重文件夹；对于Lora微调/推理，在支持上述传入方式以外，还支持同时传入Base、Lora权重，传入格式为`load_checkpoint=path/to/dir/`，其中dir路径下包含`{BASE_MODEL}.ckpt`、`{LORA_MODEL}.ckpt`。
 - auto_trans_ckpt: 是否开启自动在线权重切分或转换
 - resume_training: 加载方式，为True时会加载训练过程信息，如优化器、epochs数等
-- context: 环境配置，可以参考: [mindspore.set_context](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.set_context.html)
+- context: 环境配置，可以参考: [mindspore.set_context](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.set_context.html)
     - mode: 0代表Graph Mode， 1代表Pynative Mode
     - device_target: 设备类型，Ascend、CPU或GPU，默认为Ascend
     - enable_graph_kernel: 是否开启图算融合
@@ -53,7 +53,7 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
         - use_clip_grad: 是否开启梯度裁剪
         - loss_scale_value: 缩放系数
 - use_parallel: 是否开启并行
-- parallel: 自动并行配置，可以参考：[mindspore.set_auto_parallel_context](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.set_auto_parallel_context.html)
+- parallel: 自动并行配置，可以参考：[mindspore.set_auto_parallel_context](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.set_auto_parallel_context.html)
     - parallel_mode: 并行模式，0-dataset数据并行, 1-semi半自动并行, 2-auto自动并行, 3-hybrid手工实现并行。auto自动并行相关说明参考[自动并行](../docs/feature_cards/Auto_Parallel.md)
     - gradients_mean: 是否在梯度AllReduce后执行平均算子。通常半自动并行模式下为False，数据并行模式下为True
     - enable_alltoall: 允许在通信期间生成AllToAll通信算子的开关。通常仅在MOE场景下打开，默认False
@@ -67,7 +67,7 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
         - gradient_accumulation_shard: 设置累加梯度变量是否在数据并行维度上进行切分。
         - parallel_optimizer_threshold: 设置参数切分的阈值。
         - optimizer_weight_shard_size: 设置指定优化器权重切分通信域的大小。多机训练dp数较大时可以适当设置为一个较小的值（需要能整除dp值）。
-- parallel_config: 并行策略配置，可以参考`mindformers.modules.transformer.TransformerOpParallelConfig`，并行配置涉及**算子级并行**，可参考[文档](https://www.mindspore.cn/tutorials/experts/zh-CN/r2.3.0rc2/parallel/operator_parallel.html)。
+- parallel_config: 并行策略配置，可以参考`mindformers.modules.transformer.TransformerOpParallelConfig`，并行配置涉及**算子级并行**，可参考[文档](https://www.mindspore.cn/tutorials/experts/zh-CN/r2.3.0/parallel/operator_parallel.html)。
     - data_parallel: 数据并行，自动并行双递归策略搜索算法下无需配置
     - model_parallel: 模型并行，自动并行双递归策略搜索算法下无需配置
     - context_parallel: 序列并行，在序列维度进行切分，每台设备只负责1/context_parallel的Q和KV进行自注意力值计算，不再需要单个设备来保存整个序列，使注意力矩阵与序列长度由平方关系变成线性关系，有效降低每台计算设备显存压力，context_parallel代表序列并行数，此处为1表示不开启，此处为2表示2卡并行。
@@ -77,7 +77,7 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
     - use_seq_parallel: 是否开启序列并行，开启后将Transformer层中的LayerNorm以及Dropout的输入按序列维度进行切分，使各设备只需处理部分的LayerNorm和Dropout，减少模型显存占用。注意当context_parallel开启后，该参数不生效。
     - micro_batch_num: 流水线并行的微批次大小。pipeline_satge大于1时，开启流水并行时使用，此处需满足micro_batch_num >= pipeline_satge
     - gradient_aggregation_group: 梯度通信算子融合组的大小
-- micro_batch_interleave_num: batch_size的拆分份数，多副本并行开关，通常在模型并行时使用，用于优化model_parallel时产生的通信损耗，纯流水并行时不建议使用。可以参考[mindspore.nn.MicroBatchInterleaved](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/nn/mindspore.nn.MicroBatchInterleaved.html)
+- micro_batch_interleave_num: batch_size的拆分份数，多副本并行开关，通常在模型并行时使用，用于优化model_parallel时产生的通信损耗，纯流水并行时不建议使用。可以参考[mindspore.nn.MicroBatchInterleaved](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/nn/mindspore.nn.MicroBatchInterleaved.html)
 - moe_config: 混合专家配置，当前大部分仓上模型不支持，实验性接口，谨慎使用。可以参考mindformers.modules.transformer.moe.MoEConfig
     - expert_num: 专家数量
     - capacity_factor: 专家能力因子
@@ -89,10 +89,10 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
     - parallel_optimizer_comm_recompute: 由优化器并行引入的AllGather通信是否重计算
     - mp_comm_recompute: 由模型并行引入的通信操作是否重计算
     - recompute_slice_activation: 是否把保留在内存中的Cell输出切片
-- auto_tune: 是否开启自动数据加速，可以参考[mindspore.dataset.config.set_enable_autotune](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/dataset/mindspore.dataset.config.set_enable_autotune.html)
+- auto_tune: 是否开启自动数据加速，可以参考[mindspore.dataset.config.set_enable_autotune](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/dataset/mindspore.dataset.config.set_enable_autotune.html)
 - filepath_prefix: 优化后的全局配置的保存路径+文件前缀
-- autotune_per_step: 设置自动数据加速的配置调整step间隔，可以参考[mindspore.dataset.config.set_autotune_interval](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/dataset/mindspore.dataset.config.set_autotune_interval.html)
-- profile: 是否开启性能分析工具，可以参考[mindspore.Profiler](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.Profiler.html)
+- autotune_per_step: 设置自动数据加速的配置调整step间隔，可以参考[mindspore.dataset.config.set_autotune_interval](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/dataset/mindspore.dataset.config.set_autotune_interval.html)
+- profile: 是否开启性能分析工具，可以参考[mindspore.Profiler](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.Profiler.html)
 - profile_start_step: 性能分析开始的step
 - profile_stop_step: 性能分析结束的step
 - profile_communication: 是否在多设备训练中收集通信性能数据
@@ -104,10 +104,10 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
 - do_eval: 是否开启边训练边评估
 - eval_step_interval: 评估step间隔, 默认为100，表示每100个step间隔执行一次评估；配置为大于0的数表示每隔所配置的step数后执行一次评估，配置为小于0的数则表示禁用step评估
 - eval_epoch_interval: 评估epoch间隔, 默认为-1，表示禁用epoch结束时的评估；配置为大于0的数表示每隔所配置的epoch数后执行一次评估，配置为小于0的数则表示禁用epoch评估；注意：数据下沉模式下，epoch所包含的step数将从数据集大小变为sink size的大小，不建议在数据下沉模式下使用本项配置
-- train_dataset: 训练数据集配置，可以参考[mindspore.dataset.GeneratorDataset](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/dataset/mindspore.dataset.GeneratorDataset.html)
+- train_dataset: 训练数据集配置，可以参考[mindspore.dataset.GeneratorDataset](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/dataset/mindspore.dataset.GeneratorDataset.html)
     - seed: 随机种子
     - batch_size: 批次大小，当前在使用yaml初始化训练时，该参数会被runner_config中的batch_size覆盖
-    - data_loader: 数据加载配置，可以参考[mindspore.dataset.ImageFolderDataset](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/dataset/mindspore.dataset.ImageFolderDataset.html)
+    - data_loader: 数据加载配置，可以参考[mindspore.dataset.ImageFolderDataset](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/dataset/mindspore.dataset.ImageFolderDataset.html)
         - type: 数据加载类
         - dataset_dir: 数据集的根目录或数据集文件的路径
         - num_parallel_workers: 读取数据的工作线程数
@@ -117,7 +117,7 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
         - type: 分词器类
         - vocab_file: 词表文件路径
         - max_length: 分词器输出的最大长度
-    - mixup_op:  图像随机混合，可以参考[mindspore.dataset.vision.MixUp](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/dataset_vision/mindspore.dataset.vision.MixUp.html)
+    - mixup_op:  图像随机混合，可以参考[mindspore.dataset.vision.MixUp](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/dataset_vision/mindspore.dataset.vision.MixUp.html)
     - input_columns: 输入数据列
     - output_columns: 输出数据列
     - column_order: 输出数据顺序
@@ -163,8 +163,8 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
 - callbacks: 回调函数配置
     - type: 回调函数类
     - type: MFLossMonitor: loss打印
-    - type: SummaryMonitor: 收集summary数据，可以参考[mindspore.SummaryCollector](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.SummaryCollector.html)
-    - type: CheckpointMonitor: checkpoint保存，可以参考[mindspore.save_checkpoint](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.save_checkpoint.html)
+    - type: SummaryMonitor: 收集summary数据，可以参考[mindspore.SummaryCollector](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.SummaryCollector.html)
+    - type: CheckpointMonitor: checkpoint保存，可以参考[mindspore.save_checkpoint](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.save_checkpoint.html)
         - prefix: 权重文件前缀
         - directory: 保存权重的目录
         - save_checkpoint_seconds: 设定多少s保存一次ckpt
diff --git a/docs/RELEASE_NOTE.md b/docs/RELEASE_NOTE.md
index 69986c6c..25e9ddac 100644
--- a/docs/RELEASE_NOTE.md
+++ b/docs/RELEASE_NOTE.md
@@ -18,14 +18,14 @@ fix。
 
 以下为新支持模型：
 
-| 模型                                                                                           | 规格                                                           |
-|----------------------------------------------------------------------------------------------|--------------------------------------------------------------|
-| [Mixtral](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/mixtral/mixtral.md)   | Mixtral_8x7b（32k预训练）                                         |
-| [Qwen1.5](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwen1_5/qwen1_5.md)   | Qwen1.5_7b（预训练、微调、推理）、Qwen1.5_14b（预训练、微调、推理）、Qwen1.5_72b（预训练） |
-| [Llama3](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/llama3/llama3.md)      | Llama3_70b（预训练、微调）                                           |
-| [Deepseek](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/deepseek/deepseek.md) | Deepseek_33b（微调）                                             |
-| [Yi](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/yi/yi.md)                  | Yi_6b（微调）、Yi_34b（微调）                                         |
-| [QwenVL](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwenvl/qwenvl.md)      | QwenVL_9.6b（微调、推理）                                           |
+| 模型                                                                                            | 规格                                                            |
+|-----------------------------------------------------------------------------------------------|---------------------------------------------------------------|
+| [Mixtral](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/mixtral/mixtral.md)    | Mixtral_8x7b（32k预训练、推理）                                       |
+| [Qwen1.5](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwen1_5/qwen1_5.md)    | Qwen1.5_7b（预训练、微调、推理）、Qwen1.5_14b（预训练、微调、推理）、Qwen1.5_72b（预训练） |
+| [Llama3](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/llama3/llama3.md)       | Llama3_70b（预训练、微调）                                            |
+| [Deepseek](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/deepseek/deepseek.md) | Deepseek_33b（微调）                                              |
+| [Yi](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/yi/yi.md)                   | Yi_6b（微调）、Yi_34b（微调）                                          |
+| [QwenVL](https://gitee.com/mindspore/mindformers/tree/r1.2.0/research/qwenvl/qwenvl.md)       | QwenVL_9.6b（微调、推理）                                            |
 
 ## Bugfix
 
-- 
Gitee


From 4d69329801612f8d452897fded3cd1879d99b48e Mon Sep 17 00:00:00 2001
From: Yule100 <2538776509@qq.com>
Date: Thu, 18 Jul 2024 09:50:45 +0800
Subject: [PATCH 08/33] =?UTF-8?q?fixed=206927b9b=20from=20https://gitee.co?=
 =?UTF-8?q?m/yule100/mindformers1/pulls/3561=20baichuan2=5F7b=20=E6=8E=A8?=
 =?UTF-8?q?=E7=90=86=E7=B2=BE=E5=BA=A6=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/baichuan2/baichuan2_7b.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/research/baichuan2/baichuan2_7b.py b/research/baichuan2/baichuan2_7b.py
index d9787770..a351cfa7 100644
--- a/research/baichuan2/baichuan2_7b.py
+++ b/research/baichuan2/baichuan2_7b.py
@@ -42,7 +42,7 @@ from mindformers.models.llama.llama_config import LlamaConfig
 from mindformers.models.llama.llama_layer import LlamaEmbedding, LlamaRMSNorm
 from mindformers.models.llama.llama_transformer import LLamaDecodeLayer
 from mindformers.tools.logger import logger
-from mindformers.tools.utils import get_use_rope_self_define
+from mindformers.tools.utils import get_use_rope_self_define, get_predict_run_mode
 
 __all__ = ['Baichuan7BV2ForCausalLM', 'Baichuan7BV2Model']
 
@@ -93,7 +93,7 @@ class Baichuan7BV2Model(Baichuan2PreTrainedModel):
         self.use_rope_self_define = get_use_rope_self_define()
 
         self.shape = P.Shape()
-        self.reshape = P.Reshape().add_prim_attr("skip_redistribution", True)
+        self.reshape = P.Reshape()
         self.cast = P.Cast()
         self.tile = P.Tile()
         self.expand_dims = P.ExpandDims()
@@ -229,7 +229,6 @@ class NormHead(nn.Cell):
                  hidden_size,
                  vocab_size,
                  use_past,
-                 is_dynamic=False,
                  compute_dtype=mstype.float16,
                  eps=1e-5):
         super().__init__()
@@ -244,8 +243,6 @@ class NormHead(nn.Cell):
         self.add = P.Add()
         self.real_div = P.RealDiv()
         self.reshape = P.Reshape()
-        if is_dynamic:
-            self.reshape.add_prim_attr("skip_redistribution", True)
         self.sum = P.ReduceSum()
         self.eps = Tensor([eps], mstype.float16)
         self.is_first_iteration = True
@@ -336,8 +333,6 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
 
         self.shape = P.Shape()
         self.reshape = P.Reshape()
-        if config.is_dynamic:
-            self.reshape.add_prim_attr("skip_redistribution", True)
         self.cast = P.Cast()
         self.slice = P.StridedSlice()
         self.not_equal = P.NotEqual()
@@ -350,7 +345,6 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
         self.lm_head = NormHead(hidden_size=config.hidden_size,
                                 vocab_size=config.vocab_size,
                                 use_past=config.use_past,
-                                is_dynamic=config.is_dynamic,
                                 compute_dtype=config.compute_dtype)
 
         vocab_size = config.vocab_size
@@ -382,6 +376,7 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
 
         self.load_checkpoint(config)
         self.set_model_predict_config()
+        self.predict_run_mode = get_predict_run_mode()
 
     # pylint: disable=W0613
     def prepare_inputs_for_generation(self, input_ids, **kwargs):
@@ -434,8 +429,6 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
             tokens = input_ids
         if batch_valid_length is not None:
             batch_valid_length = self.reshape(batch_valid_length, (-1,))
-        if not self.is_first_iteration:
-            batch_valid_length = self.sub_batch_valid_len(batch_valid_length, 1)
         output = self.model(tokens, batch_valid_length, batch_index, zactivate_len, block_tables, slot_mapping)
         pre_gather = (not self.use_past or self.is_first_iteration) and batch_valid_length is not None
         if pre_gather:
@@ -453,10 +446,10 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
                 input_mask = self.mul(input_mask, label_mask)
 
         if not self.training:
-            if not pre_gather:
-                logits = self.reshape(logits, (bsz, seqlen, -1))
             logits = self.cast(logits, mstype.float32)
-            # makes cast effective to avoid allgather issue in Mindspore1.10
+            if self.predict_run_mode:
+                logits = self.reshape(logits, (-1, logits.shape[-1]))
+                return logits
             input_mask = self.add(input_mask, 1)
             return logits, tokens, input_mask
 
-- 
Gitee


From ba474e93f5f9e0564abdef066999f9113c6fb88a Mon Sep 17 00:00:00 2001
From: zxq <342239412@qq.com>
Date: Wed, 24 Jul 2024 18:29:54 +0800
Subject: [PATCH 09/33] =?UTF-8?q?=E4=BF=AE=E6=94=B9r1.2.0=E5=88=86?=
 =?UTF-8?q?=E6=94=AF=E4=B8=ADllama.md=E5=8D=95=E5=8D=A1=E6=8E=A8=E7=90=86?=
 =?UTF-8?q?=E5=AE=9E=E4=BE=8B=E9=94=99=E8=AF=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/model_cards/llama2.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/model_cards/llama2.md b/docs/model_cards/llama2.md
index c1d4954f..4fbb05c3 100644
--- a/docs/model_cards/llama2.md
+++ b/docs/model_cards/llama2.md
@@ -259,7 +259,7 @@ bash scripts/msrun_launcher.sh "run_mindformer.py \
 
 > 如果报错提示显存不足，可以通过`export HCCL_BUFFSIZE=100`将对应环境变量下调至100。
 
-`ymal`配置文件中各参数含义详见[Config配置说明](../../configs/README.md)，`parallel_speed_up`各参数含义详见[parallel_speed_up说明](https://www.mindspore.cn/docs/zh-CN/r2.3/api_python/mindspore/mindspore.set_context.html#mindspore.set_context)。
+`ymal`配置文件中各参数含义详见[Config配置说明](../../configs/README.md)，`parallel_speed_up`各参数含义详见[parallel_speed_up说明](https://www.mindspore.cn/docs/zh-CN/r2.3.0/api_python/mindspore/mindspore.set_context.html#mindspore.set_context)。
 
 ### 多机训练
 
@@ -420,7 +420,7 @@ DEVICE_NUM:  使用卡数, 仅开启多卡推理时生效
 
 ### 单卡推理
 
-以`Llama2-7b`2卡推理为例。
+以`Llama2-7b`单卡推理为例。
 
 ```shell
 bash scripts/examples/llama2/run_llama2_predict.sh single \
-- 
Gitee


From 8154c6c26db16d69a16ac609e15cdb6dcfea3aa0 Mon Sep 17 00:00:00 2001
From: xwkgch <gengchenhua@huawei.com>
Date: Sat, 27 Jul 2024 09:57:11 +0000
Subject: [PATCH 10/33] =?UTF-8?q?=E3=80=90=E9=97=AE=E9=A2=98=E5=8D=95?=
 =?UTF-8?q?=E3=80=91=E4=BF=AE=E5=A4=8D=E5=AD=A6=E4=B9=A0=E7=8E=87=E8=AE=BE?=
 =?UTF-8?q?=E7=BD=AE=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: xwkgch <gengchenhua@huawei.com>
---
 research/mixtral/pretrain_mixtral-8x7b.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/research/mixtral/pretrain_mixtral-8x7b.yaml b/research/mixtral/pretrain_mixtral-8x7b.yaml
index 3d0961e6..3ea0e8df 100644
--- a/research/mixtral/pretrain_mixtral-8x7b.yaml
+++ b/research/mixtral/pretrain_mixtral-8x7b.yaml
@@ -47,8 +47,8 @@ moe_config:
 # lr schedule
 lr_schedule:
   type: CosineWithWarmUpLR
-  learning_rate: 1.e-4
-  lr_end: 1.e-5
+  learning_rate: 1.e-5
+  lr_end: 1.e-6
   warmup_ratio: 0.03
   total_steps: -1 # -1 means it will load the total steps of the dataset
 
-- 
Gitee


From 43d18203ca6d13370dad3f56cad6e5b8f22cf64f Mon Sep 17 00:00:00 2001
From: yiyison <yiyison@126.com>
Date: Sat, 27 Jul 2024 20:49:56 +0800
Subject: [PATCH 11/33] =?UTF-8?q?=E5=88=A0=E6=8E=89=E5=BA=9F=E5=BC=83?=
 =?UTF-8?q?=E5=88=86=E6=94=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindformers/core/callback/callback.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/mindformers/core/callback/callback.py b/mindformers/core/callback/callback.py
index 88d3593f..b0642e9a 100644
--- a/mindformers/core/callback/callback.py
+++ b/mindformers/core/callback/callback.py
@@ -28,7 +28,6 @@ import mindspore as ms
 from mindspore import Callback, Profiler, ModelCheckpoint, CheckpointConfig, context, save_checkpoint, Tensor
 from mindspore.train.callback import SummaryCollector
 from mindspore.nn.learning_rate_schedule import LearningRateSchedule
-from mindspore.train.callback._callback import set_cur_net
 from mindspore.train.serialization import _get_merged_param_data
 from mindspore.nn.cell import Cell
 from mindspore.ops.operations.comm_ops import Broadcast
@@ -606,9 +605,6 @@ class CheckpointMonitor(ModelCheckpoint):
         self._last_time_for_keep = time.time()
         self._last_triggered_step = cb_params.cur_step_num
 
-        if context.get_context("enable_ge") and os.getenv("MS_ENABLE_REF_MODE", "0") == "0":
-            set_cur_net(cb_params.train_network)
-            cb_params.train_network.exec_checkpoint_graph()
         if "epoch_num" in self._append_dict:
             self._append_dict["epoch_num"] = cb_params.cur_epoch_num
         if "step_num" in self._append_dict:
-- 
Gitee


From 485bab7db3bcd62b103ff332d9105150859acd2b Mon Sep 17 00:00:00 2001
From: huangyong <huangyong92@huawei.com>
Date: Mon, 29 Jul 2024 09:27:03 +0800
Subject: [PATCH 12/33] bugfix

---
 mindformers/modules/transformer/moe.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mindformers/modules/transformer/moe.py b/mindformers/modules/transformer/moe.py
index fc9284c0..ec20802e 100644
--- a/mindformers/modules/transformer/moe.py
+++ b/mindformers/modules/transformer/moe.py
@@ -1085,6 +1085,7 @@ class TopkRouterV2(Cell):
         self.combine_gather = P.Gather(batch_dims=1).shard(((dp, 1, 1), (dp, 1, 1),))
         self.mul_router_coeff = P.Mul().shard(((dp, 1, 1, 1), (dp, 1, 1, 1)))
         self.sum_router_coeff = P.ReduceSum(keep_dims=False).shard(((dp, 1, 1, 1),))
+        self.not_equal = P.NotEqual().shard(((dp, 1, 1), ()))
 
         # sort indexing
         self.range2 = Tensor(np.tile(np.arange(131072), (self.expert_dim, 1)), mstype.float32)
@@ -1266,6 +1267,9 @@ class TopkRouterV2(Cell):
         else:
             expert_capacity = self._calculate_expert_capacity_dynamic(expert_index)
         dispatch_index, combine_index = self.topkrouter(expert_index, expert_capacity, self.expert_dim)
+        within_capacity = self.mod(combine_index, expert_capacity + 1)
+        within_capacity = self.not_equal(self.cast(within_capacity, mstype.int32), 0)
+        expert_gate = self.mul_3d(within_capacity, expert_gate)
         router_coeff = self._normalize(expert_gate)
         return dispatch_index, combine_index, router_coeff
 
-- 
Gitee


From d29f3818e22a74fbb6d6cc45d59e8d74c049e7e8 Mon Sep 17 00:00:00 2001
From: wanghaoran <wanghaoran19@huawei.com>
Date: Fri, 26 Jul 2024 18:46:17 +0800
Subject: [PATCH 13/33] Correct .shard() config of _logSoftmax for sharding
 propagation

---
 mindformers/core/loss/loss.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/mindformers/core/loss/loss.py b/mindformers/core/loss/loss.py
index c35ef7d9..44573e01 100644
--- a/mindformers/core/loss/loss.py
+++ b/mindformers/core/loss/loss.py
@@ -210,13 +210,22 @@ class _LogSoftmax(nn.Cell):
         self.on_value = Tensor(1.0, mstype.float32)
         self.off_value = Tensor(0.0, mstype.float32)
 
-        self.sum = P.ReduceSum(keep_dims=True).shard(((dp, mp),))
-        self.max = P.ReduceMax(keep_dims=True).shard(
-            ((dp, mp),))
-        self.sub = P.Sub().shard(((dp, mp), (dp, 1)))
-        self.exp = P.Exp().shard(((dp, mp),))
-        self.log = P.Log().shard(((dp, 1),))
-        self.onehot = P.OneHot().shard(((dp, mp), (), ()))
+        if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,) and _is_sharding_propagation():
+            self.sum = P.ReduceSum(keep_dims=True).shard(((dp, mp),))
+            self.max = P.ReduceMax(keep_dims=True).shard(
+                ((dp, mp),))
+            self.sub = P.Sub()
+            self.exp = P.Exp()
+            self.log = P.Log()
+            self.onehot = P.OneHot()
+        else:
+            self.sum = P.ReduceSum(keep_dims=True).shard(((dp, mp),))
+            self.max = P.ReduceMax(keep_dims=True).shard(
+                ((dp, mp),))
+            self.sub = P.Sub().shard(((dp, mp), (dp, 1)))
+            self.exp = P.Exp().shard(((dp, mp),))
+            self.log = P.Log().shard(((dp, 1),))
+            self.onehot = P.OneHot().shard(((dp, mp), (), ()))
 
     def construct(self, logits, label):
         """Forward process"""
-- 
Gitee


From cc94c09133bd3652b0866d34cebcd45809d8fe91 Mon Sep 17 00:00:00 2001
From: renyujin <renyujin1@huawei.com>
Date: Mon, 29 Jul 2024 15:07:10 +0800
Subject: [PATCH 14/33] fix_slot_mapping_pa_check

---
 mindformers/models/glm2/glm2.py         | 4 ++--
 mindformers/models/llama/llama.py       | 4 ++--
 mindformers/pet/models/lora.py          | 4 ++--
 mindformers/pet/models/prefix_tuning.py | 4 ++--
 mindformers/pet/models/ptuning2.py      | 4 ++--
 research/baichuan2/baichuan2_13b.py     | 4 ++--
 research/baichuan2/baichuan2_7b.py      | 4 ++--
 research/qwen/qwen_model.py             | 4 ++--
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/mindformers/models/glm2/glm2.py b/mindformers/models/glm2/glm2.py
index d1bd6134..28bd0791 100644
--- a/mindformers/models/glm2/glm2.py
+++ b/mindformers/models/glm2/glm2.py
@@ -240,8 +240,8 @@ class ChatGLM2ForConditionalGeneration(GLM2PreTrainedModel):
         """Get ChatGLM2 model input tuple for transform ckpt."""
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, slot_mapping, None, None
 
     def set_dynamic_inputs(self, **kwargs):
diff --git a/mindformers/models/llama/llama.py b/mindformers/models/llama/llama.py
index 13564bb5..4d278521 100644
--- a/mindformers/models/llama/llama.py
+++ b/mindformers/models/llama/llama.py
@@ -361,8 +361,8 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
         """Get Llama model input tuple for transform ckpt."""
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         prefix_keys_values = Tensor(kwargs["prefix_keys_values"]) if "prefix_keys_values" in kwargs else None
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping, prefix_keys_values
 
diff --git a/mindformers/pet/models/lora.py b/mindformers/pet/models/lora.py
index a348dff2..651dbfac 100644
--- a/mindformers/pet/models/lora.py
+++ b/mindformers/pet/models/lora.py
@@ -66,8 +66,8 @@ class LoraModel(PreTrainedModel):
     def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
 
     def slice_incremental_inputs(self, model_inputs: dict, current_index):
diff --git a/mindformers/pet/models/prefix_tuning.py b/mindformers/pet/models/prefix_tuning.py
index efcba76a..1223b8f6 100644
--- a/mindformers/pet/models/prefix_tuning.py
+++ b/mindformers/pet/models/prefix_tuning.py
@@ -56,8 +56,8 @@ class PrefixTuningModel(PreTrainedModel):
     def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
 
     def slice_incremental_inputs(self, model_inputs: dict, current_index):
diff --git a/mindformers/pet/models/ptuning2.py b/mindformers/pet/models/ptuning2.py
index 29f10c49..b8ec84fb 100644
--- a/mindformers/pet/models/ptuning2.py
+++ b/mindformers/pet/models/ptuning2.py
@@ -56,8 +56,8 @@ class Ptuning2Model(PreTrainedModel):
     def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
 
     def slice_incremental_inputs(self, model_inputs: dict, current_index):
diff --git a/research/baichuan2/baichuan2_13b.py b/research/baichuan2/baichuan2_13b.py
index 02d8e637..3d4d12c1 100644
--- a/research/baichuan2/baichuan2_13b.py
+++ b/research/baichuan2/baichuan2_13b.py
@@ -171,8 +171,8 @@ class Baichuan13BV2ForCausalLM(Baichuan2PreTrainedModel):
     def prepare_inputs_for_predict_layout(self, input_ids, **kwargs):
         """Get Baichuan13BV2 model input tuple for transform ckpt."""
         input_ids = Tensor(input_ids, mstype.int32)
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, None, None, None, None, None, None, None, None, None, None, slot_mapping
 
     def add_flags_custom(self, is_first_iteration):
diff --git a/research/baichuan2/baichuan2_7b.py b/research/baichuan2/baichuan2_7b.py
index a351cfa7..2582ad16 100644
--- a/research/baichuan2/baichuan2_7b.py
+++ b/research/baichuan2/baichuan2_7b.py
@@ -391,8 +391,8 @@ class Baichuan7BV2ForCausalLM(Baichuan2PreTrainedModel):
         """Get Llama model input tuple for transform ckpt."""
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
 
     def set_dynamic_inputs(self, **kwargs):
diff --git a/research/qwen/qwen_model.py b/research/qwen/qwen_model.py
index 459e8322..5e1e4776 100644
--- a/research/qwen/qwen_model.py
+++ b/research/qwen/qwen_model.py
@@ -120,8 +120,8 @@ class QwenForCausalLM(QwenPreTrainedModel):
         """Get Qwen model input tuple for transform ckpt."""
         input_ids = Tensor(input_ids, mstype.int32)
         labels = Tensor(kwargs["labels"]) if "labels" in kwargs else None
-        bs = input_ids.shape[0]
-        slot_mapping = Tensor(np.ones(shape=tuple([bs])), mstype.int32)
+        bs, seq = input_ids.shape[0], input_ids.shape[1]
+        slot_mapping = Tensor(np.ones(shape=tuple([bs*seq])), mstype.int32)
         return input_ids, labels, None, None, None, None, None, None, None, None, None, slot_mapping
 
     # pylint: disable=W0613
-- 
Gitee


From eeabc59459324c6a6b007f65b9ce9b5ceef85ff5 Mon Sep 17 00:00:00 2001
From: yiyison <yiyison@126.com>
Date: Mon, 29 Jul 2024 21:55:21 +0800
Subject: [PATCH 15/33] =?UTF-8?q?=E4=BF=AE=E6=94=B9=5F=5Fversion=5F=5F?=
 =?UTF-8?q?=E5=8F=98=E9=87=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindformers/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindformers/__init__.py b/mindformers/__init__.py
index 621e35e9..43a9290c 100644
--- a/mindformers/__init__.py
+++ b/mindformers/__init__.py
@@ -15,7 +15,7 @@
 
 """mindformers init"""
 
-__version__ = "1.1"
+__version__ = "1.2.0"
 
 from mindformers import core, dataset, experimental, \
     models, modules, wrapper, tools
-- 
Gitee


From d9129aafdb19b59ebc0f3c3cd1208a40d81b5930 Mon Sep 17 00:00:00 2001
From: renyujin <renyujin1@huawei.com>
Date: Sat, 3 Aug 2024 17:13:45 +0800
Subject: [PATCH 16/33] =?UTF-8?q?tokenizer=5Fsupport=5Fempty=5Fstr=20?=
 =?UTF-8?q?=EF=BC=88cherry=20picked=20commit=20from=20<gitee.com//mindspor?=
 =?UTF-8?q?e/mindformers/commit/b2b539414be72d4185b1e71476c1d736ca552674>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindformers/models/tokenization_utils_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindformers/models/tokenization_utils_base.py b/mindformers/models/tokenization_utils_base.py
index e6fed72e..3336f485 100644
--- a/mindformers/models/tokenization_utils_base.py
+++ b/mindformers/models/tokenization_utils_base.py
@@ -4024,7 +4024,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
         # Convert inputs to python lists
         token_ids = to_py_obj(token_ids)
 
-        if isinstance(token_ids[0], list):
+        if token_ids and isinstance(token_ids[0], list):
             output = []
             for item in token_ids:
                 new_strs = self._decode(
-- 
Gitee


From 301f9b889e2ae5a8293d0883287cf3346c94ccdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cntuxianyu=E2=80=9D?= <niyuxin1@huawei.com>
Date: Tue, 6 Aug 2024 09:33:28 +0800
Subject: [PATCH 17/33] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=87=8D=E8=AE=A1?=
 =?UTF-8?q?=E7=AE=97=E4=B8=ADlist=E9=95=BF=E5=BA=A6=E9=95=BF=E4=BA=8Epipel?=
 =?UTF-8?q?ine=20stage=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/llama3/finetune_llama3_70b.yaml | 4 ++--
 research/llama3/pretrain_llama3_70b.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/research/llama3/finetune_llama3_70b.yaml b/research/llama3/finetune_llama3_70b.yaml
index 42c326b0..3bcad79b 100644
--- a/research/llama3/finetune_llama3_70b.yaml
+++ b/research/llama3/finetune_llama3_70b.yaml
@@ -100,8 +100,8 @@ micro_batch_interleave_num: 1
 # recompute config
 recompute_config:
   recompute: False
-  select_recompute: [10,8,6,4,2,0,0,0,0,0]
-  select_comm_recompute: [10,8,6,4,2,0,0,0,0,0]
+  select_recompute: [10,8,6,4,2,0,0,0]
+  select_comm_recompute: [10,8,6,4,2,0,0,0]
   parallel_optimizer_comm_recompute: False
   mp_comm_recompute: True
   recompute_slice_activation: True
diff --git a/research/llama3/pretrain_llama3_70b.yaml b/research/llama3/pretrain_llama3_70b.yaml
index 886ab432..e5f409cb 100644
--- a/research/llama3/pretrain_llama3_70b.yaml
+++ b/research/llama3/pretrain_llama3_70b.yaml
@@ -100,8 +100,8 @@ micro_batch_interleave_num: 1
 # recompute config
 recompute_config:
   recompute: False
-  select_recompute: [10,8,6,4,2,0,0,0,0,0]
-  select_comm_recompute: [10,8,6,4,2,0,0,0,0,0]
+  select_recompute: [10,8,6,4,2,0,0,0]
+  select_comm_recompute: [10,8,6,4,2,0,0,0]
   parallel_optimizer_comm_recompute: False
   mp_comm_recompute: True
   recompute_slice_activation: True
-- 
Gitee


From 35f74ba470aa49ae7e3ac1d8b4c3c7cf17f69af5 Mon Sep 17 00:00:00 2001
From: yiyison <yiyison@126.com>
Date: Tue, 6 Aug 2024 20:44:46 +0800
Subject: [PATCH 18/33] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dinternlm2=20readme?=
 =?UTF-8?q?=E4=BB=A5=E5=8F=8A=E6=95=B0=E6=8D=AE=E9=9B=86=E8=BD=AC=E6=8D=A2?=
 =?UTF-8?q?=E8=84=9A=E6=9C=AC=E9=94=99=E8=AF=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/internlm2/alpaca_data_preprocess.py | 4 +---
 research/internlm2/internlm2.md              | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/research/internlm2/alpaca_data_preprocess.py b/research/internlm2/alpaca_data_preprocess.py
index 659b64fd..395b0acb 100644
--- a/research/internlm2/alpaca_data_preprocess.py
+++ b/research/internlm2/alpaca_data_preprocess.py
@@ -31,11 +31,10 @@ def preprocess(sources, tokenizer, seq_length):
     """From alpaca to mindrecord."""
     input_ids = []
     labels = []
-    special_tokenize = {"<s>": tokenizer.encode9('')[0],
+    special_tokenize = {"<s>": tokenizer.encode('')[0],
                         "</s>": tokenizer.encode('</s>', add_special_tokens=False)[0],
                         "newline": tokenizer.encode('\n', add_special_tokens=False)[0]}
 
-
     # 对话格式为
     # <|im_start|>system\n
     # source['instruction']<|im_end|>\n
@@ -44,7 +43,6 @@ def preprocess(sources, tokenizer, seq_length):
     # <|im_start|>assistant\n
     # source['output']<|im_end|>\n
 
-
     start_usr_token = tokenizer.encode('<|im_start|>user', add_special_tokens=False) + [special_tokenize["newline"]]
     start_ass_token = tokenizer.encode('<|im_start|>assistant', add_special_tokens=False) \
                       + [special_tokenize["newline"]]
diff --git a/research/internlm2/internlm2.md b/research/internlm2/internlm2.md
index 29ab1cd3..57c4279f 100644
--- a/research/internlm2/internlm2.md
+++ b/research/internlm2/internlm2.md
@@ -58,7 +58,7 @@ MindFormers提供**alpaca**作为[微调](#微调)数据集。
 |:----------|:------------------------------------------------------:|:-----------------------:|:---------------------------------------------------------------------------------------------------------------------:|
 | alpaca    |                      InternLM2-7b                      |        Finetune         |                    [Link](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json)                    |
 
-下载数据集后，使用预处理脚本`research/internlm/alpaca_data_preprocess.py`生成mindrecord训练数据:
+下载数据集后，使用预处理脚本`research/internlm2/alpaca_data_preprocess.py`生成mindrecord训练数据:
 
 ```shell
 python alpaca_data_preprocess.py \
@@ -111,14 +111,14 @@ MindFormers提供`InternLM2-7b`的微调示例， 过程中使用alpaca数据集
 ```shell
 bash scripts/msrun_launcher.sh "research/internlm2/run_internlm2.py \
   --config research/internlm2/finetune_internlm2_7b.yaml \
-  --trian_dataset path/to/tain_dataset \
+  --train_dataset path/to/tain_dataset \
   --load_checkpoint path/to/checkpoint \
   --run_mode finetune \
   --use_parallel True" 8
 
   # 参数说明
   config:           模型配置文件路径
-  trian_dataset:    微调数据集路径
+  train_dataset:    微调数据集路径
   load_checkpoint:  模型权重文件路径
   run_mode:         运行模式
   use_parallel:     是否开启并行
-- 
Gitee


From 9787c165ddffa4621fff2b5e58280480c5005067 Mon Sep 17 00:00:00 2001
From: Xinrui Chen <chenxinrui11@huawei.com>
Date: Tue, 6 Aug 2024 09:33:10 +0800
Subject: [PATCH 19/33] fix extra spaces when internlm2 tokenizer decoding

---
 research/internlm2/internlm2_tokenizer.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/research/internlm2/internlm2_tokenizer.py b/research/internlm2/internlm2_tokenizer.py
index 181f942b..e9fd3053 100644
--- a/research/internlm2/internlm2_tokenizer.py
+++ b/research/internlm2/internlm2_tokenizer.py
@@ -69,6 +69,16 @@ class InternLM2Tokenizer(PreTrainedTokenizer):
         self.sp_model.Load(vocab_file)
         self._no_prefix_space_tokens = None
 
+        if kwargs.get("chat_template") is None:
+            kwargs["chat_template"] = "{{ bos_token }}" \
+                                      "{% for message in messages %}" \
+                                      "{{'<|im_start|>' + message['role'] + '\n' + " \
+                                      "message['content'] + '<|im_end|>' + '\n'}}" \
+                                      "{% endfor %}" \
+                                      "{% if add_generation_prompt %}" \
+                                      "{{ '<|im_start|>assistant\n' }}" \
+                                      "{% endif %}"
+
         if kwargs.get("added_tokens_decoder") is None:
             kwargs["added_tokens_decoder"] = {}
 
@@ -103,7 +113,7 @@ class InternLM2Tokenizer(PreTrainedTokenizer):
     def no_prefix_space_tokens(self):
         if self._no_prefix_space_tokens is None:
             vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
-            self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")}
+            self._no_prefix_space_tokens = {tok for tok in vocab if not tok.startswith("▁")}
         return self._no_prefix_space_tokens
 
     @property
@@ -147,22 +157,17 @@ class InternLM2Tokenizer(PreTrainedTokenizer):
         """Converts a sequence of tokens (string) in a single string."""
         current_sub_tokens = []
         out_string = ""
-        prev_is_special = False
         for token in tokens:
             # make sure that special tokens are not decoded using sentencepiece model
             if token in self.all_special_tokens:
-                if not prev_is_special:
-                    out_string += " "
                 out_string += self.sp_model.decode(current_sub_tokens) + token
-                prev_is_special = True
                 current_sub_tokens = []
             else:
                 current_sub_tokens.append(token)
-                prev_is_special = False
         out_string += self.sp_model.decode(current_sub_tokens)
         out_string = self.clean_up_tokenization(out_string)
         out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
-        return out_string[1:]
+        return out_string
 
     def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
         """
-- 
Gitee


From 5a40d9d34bb6f88dbde43ff3bb45dd6a6b35faa1 Mon Sep 17 00:00:00 2001
From: lzy0920232 <liziyin1@huawei.com>
Date: Fri, 2 Aug 2024 15:34:19 +0800
Subject: [PATCH 20/33] bugfix_cp_1.2

---
 docs/feature_cards/Long_Sequence_Training.md  |  3 ---
 mindformers/models/llama/llama_layer.py       | 11 ++++++-----
 mindformers/models/llama/llama_transformer.py |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/docs/feature_cards/Long_Sequence_Training.md b/docs/feature_cards/Long_Sequence_Training.md
index 678e7a1b..b1bfe8eb 100644
--- a/docs/feature_cards/Long_Sequence_Training.md
+++ b/docs/feature_cards/Long_Sequence_Training.md
@@ -20,15 +20,12 @@
 - **context_parallel**: 序列并行
 - **pipeline_stage**: 流水线并行
 
-此外，当前序列并行方案暂未兼容词表模型并行切分，开启序列并行时需要将参数vocab_emb_dp设置为True。并且本序列并行方案与use_seq_parallel使能的序列并行方案暂不兼容。
-
 序列并行配置参考样例：
 
 ```yaml
 parallel_config:
   context_parallel: 2
   use_seq_parallel: False
-  vocab_emb_dp: True
 ```
 
 ## 配置attention_mask压缩
diff --git a/mindformers/models/llama/llama_layer.py b/mindformers/models/llama/llama_layer.py
index dbdaa98c..da81787c 100644
--- a/mindformers/models/llama/llama_layer.py
+++ b/mindformers/models/llama/llama_layer.py
@@ -134,14 +134,15 @@ class LlamaEmbedding(Cell):
             self.gather.shard(((1, 1), (dp, cp)))
             logger.info(f"Using {dp*cp} data parallel for the embedding lookup.")
         else:
-            if self.vocab_table_size % mp != 0:
-                logger.warning("The vocab size of Loss is: %s, it is not divide by model_parallel: %s",
-                               self.vocab_table_size, mp)
+            if self.vocab_table_size % (mp * cp) != 0:
+                logger.warning("The vocab size of Loss is: %s, it is not divide by model_parallel: %s"
+                               "model_parallel: %s * context_parallel: %s.",
+                               self.vocab_table_size, mp, cp)
                 logger.warning("Now, the model_parallel num of Loss will be changed: mp = 1")
                 self.gather.shard(((1, 1), (dp, cp)))
             else:
-                self.gather.shard(((mp, 1), (dp, cp)))
-                logger.info(f"Using {dp*cp} data parallel and {mp} "
+                self.gather.shard(((mp * cp, 1), (dp, 1)))
+                logger.info(f"Using {dp} data parallel, {cp} context parallel and {mp} "
                             f"model parallel for the embedding lookup.")
 
 
diff --git a/mindformers/models/llama/llama_transformer.py b/mindformers/models/llama/llama_transformer.py
index 97ddd782..2888900e 100644
--- a/mindformers/models/llama/llama_transformer.py
+++ b/mindformers/models/llama/llama_transformer.py
@@ -561,7 +561,7 @@ class LLamaDecodeLayer(nn.Cell):
                 self.attention_norm.shard((dp, 1, 1))
                 self.ffn_norm.shard((dp, 1, 1))
             if moe_config is None or not moe_config.expert_num > 1:
-                self.feed_forward.mul.shard(((dp, 1, mp), (dp, 1, mp)))
+                self.feed_forward.mul.shard(((dp, cp, mp), (dp, cp, mp)))
 
         if parallel_config.use_seq_parallel and self.is_first_iteration:
             self.add.shard(((dp, mp, 1), (dp, mp, 1)))
-- 
Gitee


From 8eb4ff2e7faff4f9434307e964a18c6830612fb9 Mon Sep 17 00:00:00 2001
From: yiyison <yiyison@126.com>
Date: Wed, 7 Aug 2024 19:33:36 +0800
Subject: [PATCH 21/33] =?UTF-8?q?=E5=88=A0=E9=99=A4graph=5Fkernel=5Fflags?=
 =?UTF-8?q?=E7=9B=B8=E5=85=B3=E4=BF=A1=E6=81=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 configs/README.md                             |  1 -
 configs/bert/run_bert_base_uncased.yaml       |  1 -
 configs/bert/run_bert_tiny_uncased.yaml       |  1 -
 configs/bloom/run_bloom_560m.yaml             |  1 -
 configs/bloom/run_bloom_7.1b.yaml             |  1 -
 configs/bloom/run_bloom_7.1b_910b.yaml        |  1 -
 configs/bloom/run_bloom_7.1b_910b_fa.yaml     |  1 -
 .../run_clip_vit_b_16_pretrain_flickr8k.yaml  |  1 -
 ...ro_shot_image_classification_cifar100.yaml |  1 -
 .../run_clip_vit_b_32_pretrain_flickr8k.yaml  |  1 -
 ...ro_shot_image_classification_cifar100.yaml |  1 -
 ...n_clip_vit_l_14@336_pretrain_flickr8k.yaml |  1 -
 ...ro_shot_image_classification_cifar100.yaml |  1 -
 .../run_clip_vit_l_14_pretrain_flickr8k.yaml  |  1 -
 ...ro_shot_image_classification_cifar100.yaml |  1 -
 configs/codegeex2/run_codegeex2_6b.yaml       |  1 -
 configs/codegeex2/run_codegeex2_6b_eval.yaml  |  1 -
 .../codegeex2/run_codegeex2_6b_finetune.yaml  |  1 -
 .../run_codegeex2_6b_finetune_2048.yaml       |  1 -
 configs/codellama/predict_codellama_34b.yaml  |  1 -
 configs/codellama/pretrain_codellama_34b.yaml |  1 -
 configs/general/run_general_task.yaml         |  1 -
 configs/glm/run_glm_6b_finetune.yaml          |  1 -
 configs/glm/run_glm_6b_infer.yaml             |  1 -
 configs/glm/run_glm_6b_lora.yaml              |  1 -
 configs/glm/run_glm_6b_lora_infer.yaml        |  1 -
 configs/glm2/finetune_glm2_6b_fp16.yaml       |  1 -
 configs/glm2/lora_glm2_6b_fp16.yaml           |  1 -
 configs/glm2/predict_glm2_6b.yaml             |  1 -
 configs/glm2/run_glm2_6b.yaml                 |  1 -
 .../run_glm2_6b_finetune_2k_800T_A2_64G.yaml  |  1 -
 .../glm2/run_glm2_6b_finetune_2k_800_32G.yaml |  1 -
 .../run_glm2_6b_finetune_800T_A2_64G.yaml     |  1 -
 .../glm2/run_glm2_6b_finetune_800_32G.yaml    |  1 -
 configs/glm2/run_glm2_6b_finetune_eval.yaml   |  1 -
 .../glm2/run_glm2_6b_lora_2k_800T_A2_64G.yaml |  1 -
 configs/glm2/run_glm2_6b_lora_2k_800_32G.yaml |  1 -
 .../glm2/run_glm2_6b_lora_800T_A2_64G.yaml    |  1 -
 configs/glm2/run_glm2_6b_lora_800_32G.yaml    |  1 -
 configs/glm2/run_glm2_6b_lora_eval.yaml       |  1 -
 configs/glm3/finetune_glm3_6b_bf16.yaml       |  1 -
 configs/glm3/predict_glm3_6b.yaml             |  1 -
 configs/glm3/run_glm3_6b.yaml                 |  1 -
 .../run_glm3_6b_finetune_2k_800T_A2_64G.yaml  |  1 -
 .../run_glm3_6b_finetune_800T_A2_64G.yaml     |  1 -
 ...lm3_6b_multiturn_finetune_800T_A2_64G.yaml |  1 -
 configs/gpt2/finetune_gpt2_small_fp16.yaml    |  1 -
 configs/gpt2/pretrain_gpt2_13b_fp16.yaml      |  1 -
 configs/gpt2/run_gpt2_13b.yaml                |  1 -
 configs/gpt2/run_gpt2_13b_910b.yaml           |  1 -
 configs/gpt2/run_gpt2_52b.yaml                |  1 -
 configs/gpt2/run_gpt2_xl.yaml                 |  1 -
 configs/gpt2/run_gpt2_xl_lora.yaml            |  1 -
 configs/llama/run_llama_13b.yaml              |  1 -
 configs/llama/run_llama_13b_910b.yaml         |  1 -
 configs/llama/run_llama_7b.yaml               |  1 -
 configs/llama/run_llama_7b_910b.yaml          |  1 -
 configs/llama/run_llama_7b_lora.yaml          |  1 -
 configs/llama2/predict_llama2_13b_quant.yaml  |  1 -
 configs/qa/run_qa_bert_base_uncased.yaml      |  1 -
 configs/sam/run_sam_vit-b.yaml                |  1 -
 configs/sam/run_sam_vit-h.yaml                |  1 -
 configs/sam/run_sam_vit-l.yaml                |  1 -
 .../swin/run_swin_base_p4w7_224_100ep.yaml    |  1 -
 configs/t5/run_t5_small_on_wmt16.yaml         |  1 -
 configs/t5/run_t5_tiny_on_wmt16.yaml          |  1 -
 .../tokcls/run_tokcls_bert_base_chinese.yaml  |  1 -
 .../run_tokcls_bert_base_chinese_cluener.yaml |  1 -
 .../txtcls/run_txtcls_bert_base_uncased.yaml  |  1 -
 .../run_txtcls_bert_base_uncased_mnli.yaml    |  1 -
 configs/vit/run_vit_base_p16_224_100ep.yaml   |  1 -
 docs/feature_cards/Chat_Web.md                |  3 ---
 .../docs/practice/gpt2_example.md             |  1 -
 .../docs/practice/llama_example.md            |  1 -
 mindformers/trainer/config_args.py            | 24 +------------------
 mindformers/trainer/training_args.py          |  6 -----
 .../finetune_baichuan2_13b_auto_parallel.yaml |  1 -
 .../finetune_baichuan2_7b_auto_parallel.yaml  |  1 -
 research/deepseek/predict_deepseek_33b.yaml   |  1 -
 research/glm32k/predict_glm32k.yaml           |  1 -
 research/internlm/finetune_internlm_7b.yaml   |  1 -
 .../internlm/finetune_internlm_7b_lora.yaml   |  1 -
 research/internlm/predict_internlm_20b.yaml   |  1 -
 research/internlm/predict_internlm_7b.yaml    |  1 -
 research/internlm2/finetune_internlm2_7b.yaml |  1 -
 research/internlm2/predict_internlm2_20b.yaml |  1 -
 research/internlm2/predict_internlm2_7b.yaml  |  1 -
 research/llama3/predict_llama3_70b.yaml       |  1 -
 .../qwen/finetune_qwen_14b_auto_parallel.yaml |  1 -
 .../qwen/finetune_qwen_7b_auto_parallel.yaml  |  1 -
 .../qwen1_5/predict_qwen1_5_14b_chat.yaml     |  1 -
 .../qwen1_5/predict_qwen1_5_72b_chat.yaml     |  1 -
 research/qwen1_5/predict_qwen1_5_7b_chat.yaml |  1 -
 .../qwenvl/finetune_qwenvl_9.6b_bf16.yaml     |  1 -
 research/qwenvl/predict_qwenvl_9.6b.yaml      |  1 -
 research/yi/predict_yi_34b_chat.yaml          |  1 -
 96 files changed, 1 insertion(+), 125 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index e16af710..68df0529 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -34,7 +34,6 @@ configs统一在run_xxx.yaml中，排序按照修改频率的顺序和一般的
     - mode: 0代表Graph Mode， 1代表Pynative Mode
     - device_target: 设备类型，Ascend、CPU或GPU，默认为Ascend
     - enable_graph_kernel: 是否开启图算融合
-    - graph_kernel_flags: 图算融合等级
     - max_call_depth: 函数调用的最大深度
     - max_device_memory: 设置设备可用的最大内存。运行多机任务时需要适当减小，为设备间通信留出更多内存空间。
     - save_graphs: 是否保存图
diff --git a/configs/bert/run_bert_base_uncased.yaml b/configs/bert/run_bert_base_uncased.yaml
index 38b54c47..ac54d585 100644
--- a/configs/bert/run_bert_base_uncased.yaml
+++ b/configs/bert/run_bert_base_uncased.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: str = "--disable_expand_ops=Softmax,Dropout " \
                               "--enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   save_graphs: False
diff --git a/configs/bert/run_bert_tiny_uncased.yaml b/configs/bert/run_bert_tiny_uncased.yaml
index 301d8a6b..4e340c37 100644
--- a/configs/bert/run_bert_tiny_uncased.yaml
+++ b/configs/bert/run_bert_tiny_uncased.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: str = "--disable_expand_ops=Softmax,Dropout " \
                               "--enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   save_graphs: False
diff --git a/configs/bloom/run_bloom_560m.yaml b/configs/bloom/run_bloom_560m.yaml
index 96a1fe57..9bb1489c 100644
--- a/configs/bloom/run_bloom_560m.yaml
+++ b/configs/bloom/run_bloom_560m.yaml
@@ -95,7 +95,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_id: 0
   device_target: "Ascend"
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_device_memory: "31GB"
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/bloom/run_bloom_7.1b.yaml b/configs/bloom/run_bloom_7.1b.yaml
index 959f1506..aee70395 100644
--- a/configs/bloom/run_bloom_7.1b.yaml
+++ b/configs/bloom/run_bloom_7.1b.yaml
@@ -95,7 +95,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_id: 0
   device_target: "Ascend"
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_device_memory: "31GB"
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/bloom/run_bloom_7.1b_910b.yaml b/configs/bloom/run_bloom_7.1b_910b.yaml
index fbd84c01..c40b0752 100644
--- a/configs/bloom/run_bloom_7.1b_910b.yaml
+++ b/configs/bloom/run_bloom_7.1b_910b.yaml
@@ -95,7 +95,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_id: 0
   device_target: "Ascend"
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_device_memory: "55GB"
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/bloom/run_bloom_7.1b_910b_fa.yaml b/configs/bloom/run_bloom_7.1b_910b_fa.yaml
index b3bf8e28..706ed7f9 100644
--- a/configs/bloom/run_bloom_7.1b_910b_fa.yaml
+++ b/configs/bloom/run_bloom_7.1b_910b_fa.yaml
@@ -96,7 +96,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_id: 0
   device_target: "Ascend"
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_device_memory: "55GB"
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml b/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml
index a1d754bc..58034461 100644
--- a/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml
+++ b/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml b/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml
index d7a6e352..4075c5d6 100644
--- a/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml
+++ b/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml b/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml
index 61e7309a..1ae852bd 100644
--- a/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml
+++ b/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml b/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml
index ca631647..75b0a571 100644
--- a/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml
+++ b/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml b/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml
index 4d3ab452..8ed0ed88 100644
--- a/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml
+++ b/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml b/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml
index 7b4c14b1..624e0436 100644
--- a/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml
+++ b/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml b/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml
index f82efe3a..369dd70f 100644
--- a/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml
+++ b/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml b/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml
index 43481e2e..a7fa7b93 100644
--- a/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml
+++ b/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/codegeex2/run_codegeex2_6b.yaml b/configs/codegeex2/run_codegeex2_6b.yaml
index 77c6a846..151d3ce3 100644
--- a/configs/codegeex2/run_codegeex2_6b.yaml
+++ b/configs/codegeex2/run_codegeex2_6b.yaml
@@ -11,7 +11,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/codegeex2/run_codegeex2_6b_eval.yaml b/configs/codegeex2/run_codegeex2_6b_eval.yaml
index 678900c8..d89f63f1 100644
--- a/configs/codegeex2/run_codegeex2_6b_eval.yaml
+++ b/configs/codegeex2/run_codegeex2_6b_eval.yaml
@@ -11,7 +11,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/codegeex2/run_codegeex2_6b_finetune.yaml b/configs/codegeex2/run_codegeex2_6b_finetune.yaml
index 1cbe81eb..bff9e9d6 100644
--- a/configs/codegeex2/run_codegeex2_6b_finetune.yaml
+++ b/configs/codegeex2/run_codegeex2_6b_finetune.yaml
@@ -11,7 +11,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml b/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml
index a69e0811..5eb63f05 100644
--- a/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml
+++ b/configs/codegeex2/run_codegeex2_6b_finetune_2048.yaml
@@ -11,7 +11,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/codellama/predict_codellama_34b.yaml b/configs/codellama/predict_codellama_34b.yaml
index 34e8a942..7b8d95e4 100644
--- a/configs/codellama/predict_codellama_34b.yaml
+++ b/configs/codellama/predict_codellama_34b.yaml
@@ -63,7 +63,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/configs/codellama/pretrain_codellama_34b.yaml b/configs/codellama/pretrain_codellama_34b.yaml
index 252bf64e..e7d4e500 100644
--- a/configs/codellama/pretrain_codellama_34b.yaml
+++ b/configs/codellama/pretrain_codellama_34b.yaml
@@ -119,7 +119,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "55GB"
   save_graphs: False
diff --git a/configs/general/run_general_task.yaml b/configs/general/run_general_task.yaml
index 41d8115d..d6627a42 100644
--- a/configs/general/run_general_task.yaml
+++ b/configs/general/run_general_task.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/glm/run_glm_6b_finetune.yaml b/configs/glm/run_glm_6b_finetune.yaml
index c1d58f87..6cf4a968 100644
--- a/configs/glm/run_glm_6b_finetune.yaml
+++ b/configs/glm/run_glm_6b_finetune.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/glm/run_glm_6b_infer.yaml b/configs/glm/run_glm_6b_infer.yaml
index ebfc2017..67a76c96 100644
--- a/configs/glm/run_glm_6b_infer.yaml
+++ b/configs/glm/run_glm_6b_infer.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/glm/run_glm_6b_lora.yaml b/configs/glm/run_glm_6b_lora.yaml
index 53a85963..d9372649 100644
--- a/configs/glm/run_glm_6b_lora.yaml
+++ b/configs/glm/run_glm_6b_lora.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/glm/run_glm_6b_lora_infer.yaml b/configs/glm/run_glm_6b_lora_infer.yaml
index 6ad551e5..1c679e48 100644
--- a/configs/glm/run_glm_6b_lora_infer.yaml
+++ b/configs/glm/run_glm_6b_lora_infer.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/glm2/finetune_glm2_6b_fp16.yaml b/configs/glm2/finetune_glm2_6b_fp16.yaml
index d8e2badb..9f06ecb0 100644
--- a/configs/glm2/finetune_glm2_6b_fp16.yaml
+++ b/configs/glm2/finetune_glm2_6b_fp16.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/lora_glm2_6b_fp16.yaml b/configs/glm2/lora_glm2_6b_fp16.yaml
index d050411f..551133b7 100644
--- a/configs/glm2/lora_glm2_6b_fp16.yaml
+++ b/configs/glm2/lora_glm2_6b_fp16.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/predict_glm2_6b.yaml b/configs/glm2/predict_glm2_6b.yaml
index 1039dc2a..0ccbf865 100644
--- a/configs/glm2/predict_glm2_6b.yaml
+++ b/configs/glm2/predict_glm2_6b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b.yaml b/configs/glm2/run_glm2_6b.yaml
index 5ee6d725..c7dfff92 100644
--- a/configs/glm2/run_glm2_6b.yaml
+++ b/configs/glm2/run_glm2_6b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_finetune_2k_800T_A2_64G.yaml b/configs/glm2/run_glm2_6b_finetune_2k_800T_A2_64G.yaml
index d8e2badb..9f06ecb0 100644
--- a/configs/glm2/run_glm2_6b_finetune_2k_800T_A2_64G.yaml
+++ b/configs/glm2/run_glm2_6b_finetune_2k_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_finetune_2k_800_32G.yaml b/configs/glm2/run_glm2_6b_finetune_2k_800_32G.yaml
index 87e05bac..d708057f 100644
--- a/configs/glm2/run_glm2_6b_finetune_2k_800_32G.yaml
+++ b/configs/glm2/run_glm2_6b_finetune_2k_800_32G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_finetune_800T_A2_64G.yaml b/configs/glm2/run_glm2_6b_finetune_800T_A2_64G.yaml
index 7d7d5c57..e6ad94e3 100644
--- a/configs/glm2/run_glm2_6b_finetune_800T_A2_64G.yaml
+++ b/configs/glm2/run_glm2_6b_finetune_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0  # 0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_finetune_800_32G.yaml b/configs/glm2/run_glm2_6b_finetune_800_32G.yaml
index c980ab60..02f5e264 100644
--- a/configs/glm2/run_glm2_6b_finetune_800_32G.yaml
+++ b/configs/glm2/run_glm2_6b_finetune_800_32G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_finetune_eval.yaml b/configs/glm2/run_glm2_6b_finetune_eval.yaml
index b3b69628..c2c7faca 100644
--- a/configs/glm2/run_glm2_6b_finetune_eval.yaml
+++ b/configs/glm2/run_glm2_6b_finetune_eval.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_lora_2k_800T_A2_64G.yaml b/configs/glm2/run_glm2_6b_lora_2k_800T_A2_64G.yaml
index d050411f..551133b7 100644
--- a/configs/glm2/run_glm2_6b_lora_2k_800T_A2_64G.yaml
+++ b/configs/glm2/run_glm2_6b_lora_2k_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_lora_2k_800_32G.yaml b/configs/glm2/run_glm2_6b_lora_2k_800_32G.yaml
index fb50e39b..48135d4e 100644
--- a/configs/glm2/run_glm2_6b_lora_2k_800_32G.yaml
+++ b/configs/glm2/run_glm2_6b_lora_2k_800_32G.yaml
@@ -13,7 +13,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_lora_800T_A2_64G.yaml b/configs/glm2/run_glm2_6b_lora_800T_A2_64G.yaml
index 6fac1bd3..6064f926 100644
--- a/configs/glm2/run_glm2_6b_lora_800T_A2_64G.yaml
+++ b/configs/glm2/run_glm2_6b_lora_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0  # 0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_lora_800_32G.yaml b/configs/glm2/run_glm2_6b_lora_800_32G.yaml
index 420d0aaf..b4c1bb75 100644
--- a/configs/glm2/run_glm2_6b_lora_800_32G.yaml
+++ b/configs/glm2/run_glm2_6b_lora_800_32G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm2/run_glm2_6b_lora_eval.yaml b/configs/glm2/run_glm2_6b_lora_eval.yaml
index e473b590..c76e8ee3 100644
--- a/configs/glm2/run_glm2_6b_lora_eval.yaml
+++ b/configs/glm2/run_glm2_6b_lora_eval.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"  # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/finetune_glm3_6b_bf16.yaml b/configs/glm3/finetune_glm3_6b_bf16.yaml
index 2915fd19..d0c20d56 100644
--- a/configs/glm3/finetune_glm3_6b_bf16.yaml
+++ b/configs/glm3/finetune_glm3_6b_bf16.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/predict_glm3_6b.yaml b/configs/glm3/predict_glm3_6b.yaml
index 944ccc5d..c1390dbf 100644
--- a/configs/glm3/predict_glm3_6b.yaml
+++ b/configs/glm3/predict_glm3_6b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/run_glm3_6b.yaml b/configs/glm3/run_glm3_6b.yaml
index 3d0b4e06..771e4fef 100644
--- a/configs/glm3/run_glm3_6b.yaml
+++ b/configs/glm3/run_glm3_6b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/run_glm3_6b_finetune_2k_800T_A2_64G.yaml b/configs/glm3/run_glm3_6b_finetune_2k_800T_A2_64G.yaml
index 46fd5093..728c033a 100644
--- a/configs/glm3/run_glm3_6b_finetune_2k_800T_A2_64G.yaml
+++ b/configs/glm3/run_glm3_6b_finetune_2k_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/run_glm3_6b_finetune_800T_A2_64G.yaml b/configs/glm3/run_glm3_6b_finetune_800T_A2_64G.yaml
index 7aab80ed..cb0d2e86 100644
--- a/configs/glm3/run_glm3_6b_finetune_800T_A2_64G.yaml
+++ b/configs/glm3/run_glm3_6b_finetune_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/glm3/run_glm3_6b_multiturn_finetune_800T_A2_64G.yaml b/configs/glm3/run_glm3_6b_multiturn_finetune_800T_A2_64G.yaml
index 0c83c725..97737c92 100644
--- a/configs/glm3/run_glm3_6b_multiturn_finetune_800T_A2_64G.yaml
+++ b/configs/glm3/run_glm3_6b_multiturn_finetune_800T_A2_64G.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/configs/gpt2/finetune_gpt2_small_fp16.yaml b/configs/gpt2/finetune_gpt2_small_fp16.yaml
index 3ca9a3d2..a06c1be2 100644
--- a/configs/gpt2/finetune_gpt2_small_fp16.yaml
+++ b/configs/gpt2/finetune_gpt2_small_fp16.yaml
@@ -14,7 +14,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/gpt2/pretrain_gpt2_13b_fp16.yaml b/configs/gpt2/pretrain_gpt2_13b_fp16.yaml
index b4d4f469..c5cb30d9 100644
--- a/configs/gpt2/pretrain_gpt2_13b_fp16.yaml
+++ b/configs/gpt2/pretrain_gpt2_13b_fp16.yaml
@@ -14,7 +14,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/gpt2/run_gpt2_13b.yaml b/configs/gpt2/run_gpt2_13b.yaml
index 28e93974..44b7a8d7 100644
--- a/configs/gpt2/run_gpt2_13b.yaml
+++ b/configs/gpt2/run_gpt2_13b.yaml
@@ -14,7 +14,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/gpt2/run_gpt2_13b_910b.yaml b/configs/gpt2/run_gpt2_13b_910b.yaml
index 56229035..6c455a2e 100644
--- a/configs/gpt2/run_gpt2_13b_910b.yaml
+++ b/configs/gpt2/run_gpt2_13b_910b.yaml
@@ -14,7 +14,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/configs/gpt2/run_gpt2_52b.yaml b/configs/gpt2/run_gpt2_52b.yaml
index 30ddac6a..562ac5f6 100644
--- a/configs/gpt2/run_gpt2_52b.yaml
+++ b/configs/gpt2/run_gpt2_52b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/gpt2/run_gpt2_xl.yaml b/configs/gpt2/run_gpt2_xl.yaml
index 79414079..f9f67f86 100644
--- a/configs/gpt2/run_gpt2_xl.yaml
+++ b/configs/gpt2/run_gpt2_xl.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/gpt2/run_gpt2_xl_lora.yaml b/configs/gpt2/run_gpt2_xl_lora.yaml
index b1322a8c..a07e1e36 100644
--- a/configs/gpt2/run_gpt2_xl_lora.yaml
+++ b/configs/gpt2/run_gpt2_xl_lora.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/configs/llama/run_llama_13b.yaml b/configs/llama/run_llama_13b.yaml
index fe0aa1a2..206b49dc 100755
--- a/configs/llama/run_llama_13b.yaml
+++ b/configs/llama/run_llama_13b.yaml
@@ -121,7 +121,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "31GB"
   save_graphs: False
diff --git a/configs/llama/run_llama_13b_910b.yaml b/configs/llama/run_llama_13b_910b.yaml
index 7c5b9f93..032caa77 100644
--- a/configs/llama/run_llama_13b_910b.yaml
+++ b/configs/llama/run_llama_13b_910b.yaml
@@ -121,7 +121,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "57GB"
   save_graphs: False
diff --git a/configs/llama/run_llama_7b.yaml b/configs/llama/run_llama_7b.yaml
index 19633e9c..4cb22f47 100755
--- a/configs/llama/run_llama_7b.yaml
+++ b/configs/llama/run_llama_7b.yaml
@@ -121,7 +121,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "31GB"
   save_graphs: False
diff --git a/configs/llama/run_llama_7b_910b.yaml b/configs/llama/run_llama_7b_910b.yaml
index 18126691..abbda8f1 100644
--- a/configs/llama/run_llama_7b_910b.yaml
+++ b/configs/llama/run_llama_7b_910b.yaml
@@ -121,7 +121,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "58GB"
   save_graphs: False
diff --git a/configs/llama/run_llama_7b_lora.yaml b/configs/llama/run_llama_7b_lora.yaml
index 51afba74..15de0f7b 100644
--- a/configs/llama/run_llama_7b_lora.yaml
+++ b/configs/llama/run_llama_7b_lora.yaml
@@ -123,7 +123,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "31GB"
   save_graphs: False
diff --git a/configs/llama2/predict_llama2_13b_quant.yaml b/configs/llama2/predict_llama2_13b_quant.yaml
index a5484a38..a3eade84 100644
--- a/configs/llama2/predict_llama2_13b_quant.yaml
+++ b/configs/llama2/predict_llama2_13b_quant.yaml
@@ -69,7 +69,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "58GB"
   save_graphs: False
diff --git a/configs/qa/run_qa_bert_base_uncased.yaml b/configs/qa/run_qa_bert_base_uncased.yaml
index b011818c..2602fed4 100644
--- a/configs/qa/run_qa_bert_base_uncased.yaml
+++ b/configs/qa/run_qa_bert_base_uncased.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/sam/run_sam_vit-b.yaml b/configs/sam/run_sam_vit-b.yaml
index 34c613a6..14ef980e 100755
--- a/configs/sam/run_sam_vit-b.yaml
+++ b/configs/sam/run_sam_vit-b.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/sam/run_sam_vit-h.yaml b/configs/sam/run_sam_vit-h.yaml
index a9da05a2..4ea1d6e5 100755
--- a/configs/sam/run_sam_vit-h.yaml
+++ b/configs/sam/run_sam_vit-h.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/sam/run_sam_vit-l.yaml b/configs/sam/run_sam_vit-l.yaml
index 279e68b4..8d2f445f 100644
--- a/configs/sam/run_sam_vit-l.yaml
+++ b/configs/sam/run_sam_vit-l.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/swin/run_swin_base_p4w7_224_100ep.yaml b/configs/swin/run_swin_base_p4w7_224_100ep.yaml
index f401aff2..56d36274 100644
--- a/configs/swin/run_swin_base_p4w7_224_100ep.yaml
+++ b/configs/swin/run_swin_base_p4w7_224_100ep.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/t5/run_t5_small_on_wmt16.yaml b/configs/t5/run_t5_small_on_wmt16.yaml
index a7b973d8..4d6ee56b 100644
--- a/configs/t5/run_t5_small_on_wmt16.yaml
+++ b/configs/t5/run_t5_small_on_wmt16.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/t5/run_t5_tiny_on_wmt16.yaml b/configs/t5/run_t5_tiny_on_wmt16.yaml
index 5e4f35c1..85f7e3e4 100644
--- a/configs/t5/run_t5_tiny_on_wmt16.yaml
+++ b/configs/t5/run_t5_tiny_on_wmt16.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/tokcls/run_tokcls_bert_base_chinese.yaml b/configs/tokcls/run_tokcls_bert_base_chinese.yaml
index 65c8e6a7..5cc97ea3 100644
--- a/configs/tokcls/run_tokcls_bert_base_chinese.yaml
+++ b/configs/tokcls/run_tokcls_bert_base_chinese.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml b/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml
index da0ce027..bc1222e2 100644
--- a/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml
+++ b/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/configs/txtcls/run_txtcls_bert_base_uncased.yaml b/configs/txtcls/run_txtcls_bert_base_uncased.yaml
index 48b3076d..dbc63508 100644
--- a/configs/txtcls/run_txtcls_bert_base_uncased.yaml
+++ b/configs/txtcls/run_txtcls_bert_base_uncased.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml b/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml
index fa035072..34e67b06 100644
--- a/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml
+++ b/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   save_graphs_path: "./graph"
diff --git a/configs/vit/run_vit_base_p16_224_100ep.yaml b/configs/vit/run_vit_base_p16_224_100ep.yaml
index 3bb6722e..28ae4453 100644
--- a/configs/vit/run_vit_base_p16_224_100ep.yaml
+++ b/configs/vit/run_vit_base_p16_224_100ep.yaml
@@ -12,7 +12,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--opt_level=0"
   max_call_depth: 10000
   save_graphs: False
   device_id: 0
diff --git a/docs/feature_cards/Chat_Web.md b/docs/feature_cards/Chat_Web.md
index cd9819a1..44a05fcf 100644
--- a/docs/feature_cards/Chat_Web.md
+++ b/docs/feature_cards/Chat_Web.md
@@ -293,7 +293,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB" # 59GB for Atlas 800T A2
   save_graphs: False
@@ -571,7 +570,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "31GB"
   save_graphs: False
@@ -838,7 +836,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "30GB"
   save_graphs: False
diff --git a/docs/readthedocs/source_zh_cn/docs/practice/gpt2_example.md b/docs/readthedocs/source_zh_cn/docs/practice/gpt2_example.md
index 9138cfd1..f725f049 100644
--- a/docs/readthedocs/source_zh_cn/docs/practice/gpt2_example.md
+++ b/docs/readthedocs/source_zh_cn/docs/practice/gpt2_example.md
@@ -1273,7 +1273,6 @@ GPT2作为大语言模型，其主要的task是文本生成和对话问答方面
     mode: 0 #0--Graph Mode; 1--Pynative Mode
     device_target: "Ascend"
     enable_graph_kernel: False
-    graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
     max_call_depth: 10000
     max_device_memory: "30GB"
     save_graphs: False
diff --git a/docs/readthedocs/source_zh_cn/docs/practice/llama_example.md b/docs/readthedocs/source_zh_cn/docs/practice/llama_example.md
index d98770fa..90c2b270 100644
--- a/docs/readthedocs/source_zh_cn/docs/practice/llama_example.md
+++ b/docs/readthedocs/source_zh_cn/docs/practice/llama_example.md
@@ -833,7 +833,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "31GB"
   save_graphs: False
diff --git a/mindformers/trainer/config_args.py b/mindformers/trainer/config_args.py
index 70314985..af11a3ef 100644
--- a/mindformers/trainer/config_args.py
+++ b/mindformers/trainer/config_args.py
@@ -98,8 +98,6 @@ class ContextConfig(BaseArgsConfig):
     |                         +------------------------------+----------------------------+
     |                         |  enable_graph_kernel         |  Ascend/GPU                |
     |                         +------------------------------+----------------------------+
-    |                         |  graph_kernel_flags          |  Ascend/GPU                |
-    |                         +------------------------------+----------------------------+
     |                         |  enable_reduce_precision     |  Ascend                    |
     |                         +------------------------------+----------------------------+
     |                         |  check_bprop                 |  CPU/GPU/Ascend            |
@@ -240,26 +238,6 @@ class ContextConfig(BaseArgsConfig):
             For details of graph kernel fusion, please check
             `Enabling Graph Kernel Fusion
             <https://www.mindspore.cn/tutorials/experts/en/r2.1/optimize/graph_fusion_engine.html>`_.
-        graph_kernel_flags (str):
-            Optimization options of graph kernel fusion, and the priority is higher when it conflicts
-            with enable_graph_kernel. Only for experienced users.
-            For example, mindspore.set_context(graph_kernel_flags="--opt_level=2 --dump_as_text"). Some general options:
-
-            - opt_level: Set the optimization level.
-              Default: ``2`` . Graph kernel fusion can be enabled equivalently by setting opt_level greater than 0.
-              Available values are:
-
-              - 0: disables graph kernel fusion;
-              - 1: enables the basic fusion of operators;
-              - 2: includes all optimizations of level 1,
-                and turns on more optimizations such as CSE, arithmetic simplification and so on;
-              - 3: includes all optimizations of level 2, and turns on more optimizations such as SitchingFusion,
-                ParallelFusion and so on. Optimizations of this level are radical and unstable in some scenarios.
-                Be caution when using this level.
-
-            - dump_as_text: dumps detail info as text files. Default: ``False`` .
-
-            More options can refer to the implementation code.
         enable_reduce_precision (bool):
             Whether to enable precision reduction.
             If the operator does not support the user-specified precision, the precision will
@@ -461,7 +439,7 @@ class ContextConfig(BaseArgsConfig):
         'enable_profiling', 'profiling_options', 'enable_auto_mixed_precision',
         'enable_graph_kernel', 'reserve_class_name_in_scope', 'check_bprop',
         'max_device_memory', 'print_file_path', 'enable_sparse', 'max_call_depth',
-        'env_config_path', 'graph_kernel_flags', 'save_compile_cache', 'runtime_num_threads',
+        'env_config_path', 'save_compile_cache', 'runtime_num_threads',
         'load_compile_cache', 'grad_for_scalar', 'pynative_synchronize', 'mempool_block_size'
     ]
 
diff --git a/mindformers/trainer/training_args.py b/mindformers/trainer/training_args.py
index add6aa0a..cf85978e 100644
--- a/mindformers/trainer/training_args.py
+++ b/mindformers/trainer/training_args.py
@@ -204,10 +204,6 @@ class TrainingArguments:
         default=False,
         metadata={"help": "Whether to enable graph fusion. Default: False."}
     )
-    graph_kernel_flags: str = field(
-        default="--opt_level=0",
-        metadata={"help": "Graph fusion level."}
-    )
     max_call_depth: int = field(
         default=10000,
         metadata={"help": "Maximum depth of function calls. Default: 10000."}
@@ -1586,8 +1582,6 @@ class TrainingArguments:
             task_config.context.device_target, self.device_target)
         task_config.context.enable_graph_kernel = _check_training_args(
             task_config.context.enable_graph_kernel, self.enable_graph_kernel)
-        task_config.context.graph_kernel_flags = _check_training_args(
-            task_config.context.graph_kernel_flags, self.graph_kernel_flags)
         task_config.context.max_call_depth = _check_training_args(
             task_config.context.max_call_depth, self.max_call_depth)
         task_config.context.max_device_memory = _check_training_args(
diff --git a/research/baichuan2/finetune_baichuan2_13b_auto_parallel.yaml b/research/baichuan2/finetune_baichuan2_13b_auto_parallel.yaml
index 885f02e8..5777a351 100644
--- a/research/baichuan2/finetune_baichuan2_13b_auto_parallel.yaml
+++ b/research/baichuan2/finetune_baichuan2_13b_auto_parallel.yaml
@@ -121,7 +121,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/baichuan2/finetune_baichuan2_7b_auto_parallel.yaml b/research/baichuan2/finetune_baichuan2_7b_auto_parallel.yaml
index 1655ff9e..0c6094d4 100644
--- a/research/baichuan2/finetune_baichuan2_7b_auto_parallel.yaml
+++ b/research/baichuan2/finetune_baichuan2_7b_auto_parallel.yaml
@@ -122,7 +122,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/deepseek/predict_deepseek_33b.yaml b/research/deepseek/predict_deepseek_33b.yaml
index 4de47ea4..e064258f 100644
--- a/research/deepseek/predict_deepseek_33b.yaml
+++ b/research/deepseek/predict_deepseek_33b.yaml
@@ -124,7 +124,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "58GB"
   save_graphs: False
diff --git a/research/glm32k/predict_glm32k.yaml b/research/glm32k/predict_glm32k.yaml
index 40f0923b..bdedcb70 100644
--- a/research/glm32k/predict_glm32k.yaml
+++ b/research/glm32k/predict_glm32k.yaml
@@ -11,7 +11,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB" # 59GB for Atlas 800T A2
   save_graphs: False
diff --git a/research/internlm/finetune_internlm_7b.yaml b/research/internlm/finetune_internlm_7b.yaml
index 41284538..5228744b 100644
--- a/research/internlm/finetune_internlm_7b.yaml
+++ b/research/internlm/finetune_internlm_7b.yaml
@@ -108,7 +108,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm/finetune_internlm_7b_lora.yaml b/research/internlm/finetune_internlm_7b_lora.yaml
index 928fc978..459e2777 100644
--- a/research/internlm/finetune_internlm_7b_lora.yaml
+++ b/research/internlm/finetune_internlm_7b_lora.yaml
@@ -109,7 +109,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm/predict_internlm_20b.yaml b/research/internlm/predict_internlm_20b.yaml
index 73c32280..b6d03435 100644
--- a/research/internlm/predict_internlm_20b.yaml
+++ b/research/internlm/predict_internlm_20b.yaml
@@ -70,7 +70,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm/predict_internlm_7b.yaml b/research/internlm/predict_internlm_7b.yaml
index 200de688..fd2cb991 100644
--- a/research/internlm/predict_internlm_7b.yaml
+++ b/research/internlm/predict_internlm_7b.yaml
@@ -70,7 +70,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm2/finetune_internlm2_7b.yaml b/research/internlm2/finetune_internlm2_7b.yaml
index 02d30230..15d7dcd9 100644
--- a/research/internlm2/finetune_internlm2_7b.yaml
+++ b/research/internlm2/finetune_internlm2_7b.yaml
@@ -108,7 +108,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm2/predict_internlm2_20b.yaml b/research/internlm2/predict_internlm2_20b.yaml
index df3d201d..77991573 100644
--- a/research/internlm2/predict_internlm2_20b.yaml
+++ b/research/internlm2/predict_internlm2_20b.yaml
@@ -86,7 +86,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/internlm2/predict_internlm2_7b.yaml b/research/internlm2/predict_internlm2_7b.yaml
index dee06c5d..2ddd9704 100644
--- a/research/internlm2/predict_internlm2_7b.yaml
+++ b/research/internlm2/predict_internlm2_7b.yaml
@@ -109,7 +109,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "59GB"
   save_graphs: False
diff --git a/research/llama3/predict_llama3_70b.yaml b/research/llama3/predict_llama3_70b.yaml
index e1e580d7..9301bd56 100644
--- a/research/llama3/predict_llama3_70b.yaml
+++ b/research/llama3/predict_llama3_70b.yaml
@@ -66,7 +66,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "58GB"
   save_graphs: False
diff --git a/research/qwen/finetune_qwen_14b_auto_parallel.yaml b/research/qwen/finetune_qwen_14b_auto_parallel.yaml
index 3d2f724f..d2a2cd76 100644
--- a/research/qwen/finetune_qwen_14b_auto_parallel.yaml
+++ b/research/qwen/finetune_qwen_14b_auto_parallel.yaml
@@ -148,7 +148,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwen/finetune_qwen_7b_auto_parallel.yaml b/research/qwen/finetune_qwen_7b_auto_parallel.yaml
index c5b8100f..2c7f4cbf 100644
--- a/research/qwen/finetune_qwen_7b_auto_parallel.yaml
+++ b/research/qwen/finetune_qwen_7b_auto_parallel.yaml
@@ -147,7 +147,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwen1_5/predict_qwen1_5_14b_chat.yaml b/research/qwen1_5/predict_qwen1_5_14b_chat.yaml
index 85735b6e..79d5a249 100644
--- a/research/qwen1_5/predict_qwen1_5_14b_chat.yaml
+++ b/research/qwen1_5/predict_qwen1_5_14b_chat.yaml
@@ -94,7 +94,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwen1_5/predict_qwen1_5_72b_chat.yaml b/research/qwen1_5/predict_qwen1_5_72b_chat.yaml
index 21cd8547..857db0be 100644
--- a/research/qwen1_5/predict_qwen1_5_72b_chat.yaml
+++ b/research/qwen1_5/predict_qwen1_5_72b_chat.yaml
@@ -94,7 +94,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwen1_5/predict_qwen1_5_7b_chat.yaml b/research/qwen1_5/predict_qwen1_5_7b_chat.yaml
index e5970355..b2afbb5a 100644
--- a/research/qwen1_5/predict_qwen1_5_7b_chat.yaml
+++ b/research/qwen1_5/predict_qwen1_5_7b_chat.yaml
@@ -94,7 +94,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwenvl/finetune_qwenvl_9.6b_bf16.yaml b/research/qwenvl/finetune_qwenvl_9.6b_bf16.yaml
index b270b44b..2c3ef753 100644
--- a/research/qwenvl/finetune_qwenvl_9.6b_bf16.yaml
+++ b/research/qwenvl/finetune_qwenvl_9.6b_bf16.yaml
@@ -205,7 +205,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/qwenvl/predict_qwenvl_9.6b.yaml b/research/qwenvl/predict_qwenvl_9.6b.yaml
index ee09c299..f86564c6 100644
--- a/research/qwenvl/predict_qwenvl_9.6b.yaml
+++ b/research/qwenvl/predict_qwenvl_9.6b.yaml
@@ -138,7 +138,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   ascend_config:
     precision_mode: "must_keep_origin_dtype"
   max_call_depth: 10000
diff --git a/research/yi/predict_yi_34b_chat.yaml b/research/yi/predict_yi_34b_chat.yaml
index a4d3d57a..0e643d52 100644
--- a/research/yi/predict_yi_34b_chat.yaml
+++ b/research/yi/predict_yi_34b_chat.yaml
@@ -123,7 +123,6 @@ context:
   mode: 0 #0--Graph Mode; 1--Pynative Mode
   device_target: "Ascend"
   enable_graph_kernel: False
-  graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true"
   max_call_depth: 10000
   max_device_memory: "58GB"
   save_graphs: False
-- 
Gitee


From ef747e14cb81d4cd6c69558b51b50f9554d093ea Mon Sep 17 00:00:00 2001
From: koukairui <koukairui@huawei.com>
Date: Fri, 9 Aug 2024 11:08:20 +0800
Subject: [PATCH 22/33] remove experimental module from mindformers __init__.py

---
 mindformers/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mindformers/__init__.py b/mindformers/__init__.py
index 43a9290c..64d5dfff 100644
--- a/mindformers/__init__.py
+++ b/mindformers/__init__.py
@@ -17,13 +17,12 @@
 
 __version__ = "1.2.0"
 
-from mindformers import core, dataset, experimental, \
+from mindformers import core, dataset, \
     models, modules, wrapper, tools
 from mindformers.pipeline import *
 from mindformers.trainer import *
 from mindformers.core import *
 from mindformers.dataset import *
-from mindformers.experimental import *
 from mindformers.models import *
 from mindformers.modules import *
 from mindformers.wrapper import *
@@ -37,7 +36,6 @@ from .mindformer_book import MindFormerBook
 
 __all__ = []
 __all__.extend(dataset.__all__)
-__all__.extend(experimental.__all__)
 __all__.extend(models.__all__)
 __all__.extend(core.__all__)
 __all__.extend(tools.__all__)
-- 
Gitee


From a9e2f26d36b543a1ca5dbb6e01477070080a746f Mon Sep 17 00:00:00 2001
From: yang-minghai22 <hale97518@foxmail.com>
Date: Sat, 17 Aug 2024 16:11:01 +0800
Subject: [PATCH 23/33] remove hal sync operation

---
 mindformers/generation/text_generator.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mindformers/generation/text_generator.py b/mindformers/generation/text_generator.py
index 5d80e4c4..b9686210 100644
--- a/mindformers/generation/text_generator.py
+++ b/mindformers/generation/text_generator.py
@@ -313,7 +313,6 @@ class GenerationMixin:
             res = self(
                 **model_inputs,
             )
-            ms.hal.synchronize()
             self.phase = "increment"
             # first iter done, go to other iters
             self.add_flags_custom(is_first_iteration=False)
@@ -332,7 +331,6 @@ class GenerationMixin:
             res = self(
                 **model_inputs,
             )
-            ms.hal.synchronize()
 
         return res
 
-- 
Gitee


From b268592804a123646a14231a91035f052e41e062 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi9@huawei.com>
Date: Thu, 22 Aug 2024 10:01:29 +0800
Subject: [PATCH 24/33] keep version in readme same to branch

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dc1f5163..302fb7e0 100644
--- a/README.md
+++ b/README.md
@@ -376,7 +376,7 @@ MindFormers已支持大部分模型的[LoRA微调](docs/feature_cards/Pet_Tuners
 MindFormers目前支持源码编译安装，用户可以执行如下命令进行安装。
 
 ```shell
-git clone -b dev https://gitee.com/mindspore/mindformers.git
+git clone -b r1.2.0 https://gitee.com/mindspore/mindformers.git
 cd mindformers
 bash build.sh
 ```
-- 
Gitee


From 868960d051ec1035bb38006996a0ddf67aa165d6 Mon Sep 17 00:00:00 2001
From: wangpingan2 <wangpingan2@huawei.com>
Date: Thu, 22 Aug 2024 10:23:48 +0800
Subject: [PATCH 25/33] optimizer kbk performance

---
 mindformers/model_runner.py       | 7 ++-----
 mindformers/models/llama/llama.py | 3 +--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/mindformers/model_runner.py b/mindformers/model_runner.py
index e64b84f1..d577f72d 100644
--- a/mindformers/model_runner.py
+++ b/mindformers/model_runner.py
@@ -21,7 +21,7 @@ from typing import Optional, List, Union, Dict
 import numpy as np
 
 import mindspore as ms
-from mindspore import ops, Tensor
+from mindspore import Tensor
 from mindspore.communication.management import init
 from mindspore.common.initializer import Zero
 from mindspore._c_expression import swap_cache
@@ -274,10 +274,7 @@ class MindIEModelRunner:
                                               slot_mapping=slot_mapping,
                                               prefill=prefill,
                                               use_past=True)
-        if isinstance(res, tuple):
-            logits = ops.reshape(res[0], (-1, res[0].shape[-1]))
-        else:
-            logits = ops.reshape(res, (-1, res.shape[-1]))
+        logits = res[0] if isinstance(res, tuple) else res
         if prefill and logits.shape[0] > len(current_idx):
             logits = logits[Tensor(current_idx)]
 
diff --git a/mindformers/models/llama/llama.py b/mindformers/models/llama/llama.py
index 4d278521..fe2392ba 100644
--- a/mindformers/models/llama/llama.py
+++ b/mindformers/models/llama/llama.py
@@ -287,8 +287,6 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
 
         self.shape = P.Shape()
         self.reshape = P.Reshape()
-        if config.is_dynamic:
-            self.reshape.add_prim_attr("skip_redistribution", True)
         self.cast = P.Cast()
         self.slice = P.StridedSlice()
         self.not_equal = P.NotEqual()
@@ -446,6 +444,7 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
         if not self.training:
             logits = self.cast(logits, mstype.float32)
             if self.predict_run_mode:
+                logits = self.reshape(logits, (-1, logits.shape[-1]))
                 return logits
             return logits, tokens, input_mask
 
-- 
Gitee


From 6c524b0c1303e6520b1358654872dacc7b8faed0 Mon Sep 17 00:00:00 2001
From: twc <tanweicheng@huawei.com>
Date: Sat, 24 Aug 2024 12:04:56 +0800
Subject: [PATCH 26/33] Revert "optimizer kbk performance"

This reverts commit 868960d051ec1035bb38006996a0ddf67aa165d6.
---
 mindformers/model_runner.py       | 7 +++++--
 mindformers/models/llama/llama.py | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/mindformers/model_runner.py b/mindformers/model_runner.py
index d577f72d..e64b84f1 100644
--- a/mindformers/model_runner.py
+++ b/mindformers/model_runner.py
@@ -21,7 +21,7 @@ from typing import Optional, List, Union, Dict
 import numpy as np
 
 import mindspore as ms
-from mindspore import Tensor
+from mindspore import ops, Tensor
 from mindspore.communication.management import init
 from mindspore.common.initializer import Zero
 from mindspore._c_expression import swap_cache
@@ -274,7 +274,10 @@ class MindIEModelRunner:
                                               slot_mapping=slot_mapping,
                                               prefill=prefill,
                                               use_past=True)
-        logits = res[0] if isinstance(res, tuple) else res
+        if isinstance(res, tuple):
+            logits = ops.reshape(res[0], (-1, res[0].shape[-1]))
+        else:
+            logits = ops.reshape(res, (-1, res.shape[-1]))
         if prefill and logits.shape[0] > len(current_idx):
             logits = logits[Tensor(current_idx)]
 
diff --git a/mindformers/models/llama/llama.py b/mindformers/models/llama/llama.py
index fe2392ba..4d278521 100644
--- a/mindformers/models/llama/llama.py
+++ b/mindformers/models/llama/llama.py
@@ -287,6 +287,8 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
 
         self.shape = P.Shape()
         self.reshape = P.Reshape()
+        if config.is_dynamic:
+            self.reshape.add_prim_attr("skip_redistribution", True)
         self.cast = P.Cast()
         self.slice = P.StridedSlice()
         self.not_equal = P.NotEqual()
@@ -444,7 +446,6 @@ class LlamaForCausalLM(LlamaPreTrainedModel):
         if not self.training:
             logits = self.cast(logits, mstype.float32)
             if self.predict_run_mode:
-                logits = self.reshape(logits, (-1, logits.shape[-1]))
                 return logits
             return logits, tokens, input_mask
 
-- 
Gitee


From da0f9a1a444a13056830a65c6f09efdc610ec585 Mon Sep 17 00:00:00 2001
From: Lin <heqinglin4@huawei.com>
Date: Tue, 27 Aug 2024 06:28:09 +0000
Subject: [PATCH 27/33] update README.md.

Signed-off-by: Lin <heqinglin4@huawei.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dc1f5163..302fb7e0 100644
--- a/README.md
+++ b/README.md
@@ -376,7 +376,7 @@ MindFormers已支持大部分模型的[LoRA微调](docs/feature_cards/Pet_Tuners
 MindFormers目前支持源码编译安装，用户可以执行如下命令进行安装。
 
 ```shell
-git clone -b dev https://gitee.com/mindspore/mindformers.git
+git clone -b r1.2.0 https://gitee.com/mindspore/mindformers.git
 cd mindformers
 bash build.sh
 ```
-- 
Gitee


From 53da8772df1939262becb8e778bd827d0f9fbc6d Mon Sep 17 00:00:00 2001
From: michealswhite <fenghao55@huawei.com>
Date: Fri, 30 Aug 2024 18:05:31 +0800
Subject: [PATCH 28/33] check recompute config legal

---
 mindformers/tools/check_rules.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/mindformers/tools/check_rules.py b/mindformers/tools/check_rules.py
index 9a97e9d9..ff0e6fca 100644
--- a/mindformers/tools/check_rules.py
+++ b/mindformers/tools/check_rules.py
@@ -225,9 +225,25 @@ def _check_env(config):
             logger.warning(f"ENABLE_LAZY_INLINE must be set in environment when use fine_grain_interleave"
                            f" (export ENABLE_LAZY_INLINE=1)")
 
+
+def _rule_recompute(pp, recompute, key):
+    if isinstance(recompute, list) and len(recompute) > pp:
+        if all(isinstance(n, int) for n in recompute):
+            raise ValueError(f"length of {key} should be equal or less than pipeline_stage number, but get "
+                             f"length of {key} ({recompute}) more than pp({pp})")
+
+
+def _check_recompute(config):
+    pp = config.parallel_config.pipeline_stage
+    _rule_recompute(pp, config.recompute_config.recompute, "recompute")
+    _rule_recompute(pp, config.recompute_config.select_recompute, "select_recompute")
+    _rule_recompute(pp, config.recompute_config.select_comm_recompute, "select_comm_recompute")
+
+
 def check_rules(config, mode='train', **kwargs):
     """check rules"""
     _check_mode(config, mode, **kwargs)
     _check_full_batch()
     _check_parallel(config)
     _check_env(config)
+    _check_recompute(config)
-- 
Gitee


From fda0ec9f79d8dd0846ac33b2373e49f20cc962a2 Mon Sep 17 00:00:00 2001
From: lanxiang <ls89862947@qq.com>
Date: Sat, 31 Aug 2024 16:07:05 +0800
Subject: [PATCH 29/33] =?UTF-8?q?[bug=20fix]=E4=BF=AE=E6=94=B9internlm2=5F?=
 =?UTF-8?q?config.py=E7=B1=BB=E5=9E=8B=E4=B8=8D=E9=80=82=E9=85=8D=E7=9A=84?=
 =?UTF-8?q?bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/internlm2/internlm2_config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/research/internlm2/internlm2_config.py b/research/internlm2/internlm2_config.py
index 829f226d..2b147a8a 100644
--- a/research/internlm2/internlm2_config.py
+++ b/research/internlm2/internlm2_config.py
@@ -151,7 +151,10 @@ class InternLM2Config(LlamaConfig):
         self.softmax_compute_type = convert_mstype(softmax_compute_type)
         self.rotary_dtype = convert_mstype(rotary_dtype)
         self.compute_dtype = convert_mstype(compute_dtype)
-        self.parallel_config = parallel_config
+        if isinstance(parallel_config, dict):
+            self.parallel_config = TransformerOpParallelConfig(**parallel_config)
+        else:
+            self.parallel_config = parallel_config
         self.checkpoint_name_or_path = checkpoint_name_or_path
         self.bos_token_id = bos_token_id
         self.eos_token_id = eos_token_id
-- 
Gitee


From 76a175f08c59dde0bca9d4fdb487e93c49063a4e Mon Sep 17 00:00:00 2001
From: zouwenxiang <zouwenxiang1@huawei.com>
Date: Thu, 29 Aug 2024 18:33:02 +0800
Subject: [PATCH 30/33] =?UTF-8?q?=E5=AF=B9=E6=8E=A5=20mindie=20=E5=B9=B6?=
 =?UTF-8?q?=E8=A1=8C=E8=A7=A3=E7=A0=81=20=E5=85=AC=E5=85=B1=E6=8E=A5?=
 =?UTF-8?q?=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindformers/generation/text_generator.py |  9 ++++++--
 mindformers/model_runner.py              | 29 +++++++++++++++++++-----
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/mindformers/generation/text_generator.py b/mindformers/generation/text_generator.py
index b9686210..9c6a6584 100644
--- a/mindformers/generation/text_generator.py
+++ b/mindformers/generation/text_generator.py
@@ -318,7 +318,8 @@ class GenerationMixin:
             self.add_flags_custom(is_first_iteration=False)
         else:
             # slice model inputs for incremental infer
-            self.slice_incremental_inputs(model_inputs, current_index)
+            if not (hasattr(self.config, 'parallel_decoding') and self.config.parallel_decoding):
+                self.slice_incremental_inputs(model_inputs, current_index)
             model_inputs["input_position"] = Tensor.from_numpy(np.array(current_index, dtype=np.int32))
             model_inputs["init_reset"] = Tensor.from_numpy(
                 np.array([True], dtype=np.bool_))  # init_reset (1,) bool True
@@ -1063,7 +1064,11 @@ class GenerationMixin:
         Returns:
             res, current_index
         """
-        input_ids = np.reshape(input_ids, (-1, np.shape(input_ids)[-1]))
+        if ((hasattr(self.config, 'parallel_decoding') and self.config.parallel_decoding != 'la')
+                and (('q_seq_lens' in model_kwargs) and (model_kwargs['q_seq_lens'] is not None))):
+            input_ids = np.reshape(input_ids, (1, -1))
+        else:
+            input_ids = np.reshape(input_ids, (-1, np.shape(input_ids)[-1]))
         batch_size = input_ids.shape[0]
         seq_length = input_ids.shape[1]
         current_index = [
diff --git a/mindformers/model_runner.py b/mindformers/model_runner.py
index e64b84f1..5eba2dfc 100644
--- a/mindformers/model_runner.py
+++ b/mindformers/model_runner.py
@@ -18,6 +18,7 @@ For text generation
 """
 import os
 from typing import Optional, List, Union, Dict
+import json
 import numpy as np
 
 import mindspore as ms
@@ -129,7 +130,7 @@ class ModelRunner:
     """
 
     def __new__(cls, model_path, npu_mem_size, cpu_mem_size, block_size, rank_id=0, world_size=1,
-                npu_device_ids=None):
+                npu_device_ids=None, plugin_params=None):
         config_path = _get_model_config(model_path)
         config = MindFormerConfig(config_path)
         model_type = config.model.arch.type
@@ -143,7 +144,7 @@ class ModelRunner:
                             f"and will use the default one defined in mindformers.")
 
         model_runner = model_runner_cls(model_path, config_path, npu_mem_size, cpu_mem_size,
-                                        block_size, rank_id, world_size, npu_device_ids)
+                                        block_size, rank_id, world_size, npu_device_ids, plugin_params)
         return model_runner
 
 
@@ -173,7 +174,7 @@ class MindIEModelRunner:
     """
 
     def __init__(self, model_path, config_path, npu_mem_size, cpu_mem_size, block_size, rank_id=0,
-                 world_size=1, npu_device_ids=None):
+                 world_size=1, npu_device_ids=None, plugin_params=None):
         self.config = MindFormerConfig(config_path)
         # register to Auto Class
         register_auto_class(self.config, model_path, class_type="AutoConfig")
@@ -188,8 +189,12 @@ class MindIEModelRunner:
             if rank_id == 0 and os.fork() == 0:
                 os.environ['MS_ROLE'] = 'MS_SCHED'
                 init()
-
         self.model_config = AutoConfig.from_pretrained(config_path)
+
+        self.model_config.parallel_decoding = None
+        if plugin_params:
+            self.model_config.parallel_decoding = json.loads(plugin_params)['plugin_type']
+        self.model_config.checkpoint_path = self.config.load_checkpoint
         self.num_layers = self.model_config.num_layers
         self.num_kv_heads = self.model_config.num_heads if self.model_config.n_kv_heads is None \
             else self.model_config.n_kv_heads
@@ -247,7 +252,10 @@ class MindIEModelRunner:
                 valid_length_each_example: List[int],
                 block_tables: Optional[Tensor] = None,
                 slot_mapping: Optional[Tensor] = None,
-                prefill: bool = True,):
+                prefill: bool = True,
+                position_ids: Optional[Tensor] = None,
+                spec_mask: Optional[Tensor] = None,
+                q_seq_lens: Optional[Tensor] = None):
         """
         Call self.model.infer() or self.model.forward() to do infer and return logits on next position, \
         can choose do prefill or decode predict.
@@ -263,6 +271,12 @@ class MindIEModelRunner:
                 Params for page attention
             prefill (bool):
                 Whether to do prefill predict or decode predict
+            position_ids (Tensor):
+                Params for position encoding
+            spec_mask (Tensor):
+                Params for page attention
+            q_seq_lens (Tensor):
+                Params for page attention
 
         Returns:
             logits (Tensor)
@@ -273,7 +287,10 @@ class MindIEModelRunner:
                                               block_tables=block_tables,
                                               slot_mapping=slot_mapping,
                                               prefill=prefill,
-                                              use_past=True)
+                                              use_past=True,
+                                              position_ids=position_ids,
+                                              spec_mask=spec_mask,
+                                              q_seq_lens=q_seq_lens)
         if isinstance(res, tuple):
             logits = ops.reshape(res[0], (-1, res[0].shape[-1]))
         else:
-- 
Gitee


From 77d5b64651e91f212688256ee8f8e8bca81f232a Mon Sep 17 00:00:00 2001
From: yiyison <yiyison@126.com>
Date: Mon, 2 Sep 2024 20:13:23 +0800
Subject: [PATCH 31/33] =?UTF-8?q?MOE=E6=9A=82=E4=B8=8D=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=E9=95=BF=E5=BA=8F=E5=88=97=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 research/deepseek/deepseek.md | 2 ++
 research/mixtral/mixtral.md   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/research/deepseek/deepseek.md b/research/deepseek/deepseek.md
index 1a8889ce..7b01f977 100644
--- a/research/deepseek/deepseek.md
+++ b/research/deepseek/deepseek.md
@@ -172,6 +172,8 @@ train_dataset_dir: 训练数据集路径
 use_parallel:      是否开启并行训练
 ```
 
+> 注：此模型暂不支持配置`context_parallel`，因此暂不支持长序列。
+
 ## 全参微调
 
 MindFormers提供`deepseek-33b`多机多卡微调示例，使用`code_alpaca`数据集对模型进行微调，数据集可以参考[数据集下载](#数据集下载)获得。
diff --git a/research/mixtral/mixtral.md b/research/mixtral/mixtral.md
index 0c1340a0..8bab8de9 100644
--- a/research/mixtral/mixtral.md
+++ b/research/mixtral/mixtral.md
@@ -279,7 +279,7 @@ parallel_config:
   gradient_aggregation_group: 4
 ```
 
-> **注：多机多卡并行配置中data_parallel\*model_parallel\*pipeline_stage == 总卡数，且expert_parallel不能超过data_parallel。**
+> **注：多机多卡并行配置中data_parallel\*model_parallel\*pipeline_stage == 总卡数，且expert_parallel不能超过data_parallel。此模型暂不支持配置`context_parallel`，因此暂不支持长序列。**
 
 - step 3. 调大`moe_config`中的专家容量因子`capacity_factor`(非必要步骤)
 
-- 
Gitee


From 3c9b014ec6783c532d8f67775c199fd8d17f0b2f Mon Sep 17 00:00:00 2001
From: suhaibo <suhaibo1@huawei.com>
Date: Wed, 9 Oct 2024 01:39:32 +0000
Subject: [PATCH 32/33] update .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md.

Signed-off-by: suhaibo <suhaibo1@huawei.com>
---
 .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
index b89c2015..7605710a 100644
--- a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
+++ b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
@@ -7,12 +7,17 @@
 ### 描述（做了什么，变更了什么）
 
 
-### check list
+### 功能自验（本地完成用例验证，附上结果）
 
-#### 是否完成方案评审或问题根因分析（Y/N）
 
-#### 是否完成了功能模块的UT/ST，并执行通过，附上结果（Y/N）
+### self checklist
 
-#### 是否涉及公共组件或对外接口修改，涉及时需给出修改范围和影响评估（Y/N）
+[] 是否完成方案评审或问题根因分析
 
-#### 是否涉及资料修改，涉及时需同步修改（Y/N）
\ No newline at end of file
+[] 是否完成了功能模块的UT/ST，并执行通过
+
+[] 是否涉及公共组件或跨组件修改，并完成涉及组件冒烟用例测试，附上结果
+
+[] 是否涉及资料修改，涉及时需同步修改
+
+[] 是否涉及兼容性，相关组件是否进行变更通知（消息知会 + 邮件知会）
\ No newline at end of file
-- 
Gitee


From f77b20666ba9feeb43444b3b6cd911ed569bf665 Mon Sep 17 00:00:00 2001
From: suhaibo <suhaibo1@huawei.com>
Date: Wed, 9 Oct 2024 03:33:16 +0000
Subject: [PATCH 33/33] update .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md.

Signed-off-by: suhaibo <suhaibo1@huawei.com>
---
 .gitee/PULL_REQUEST_TEMPLATE.zh-CN.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
index 7605710a..0f9136e2 100644
--- a/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
+++ b/.gitee/PULL_REQUEST_TEMPLATE.zh-CN.md
@@ -12,12 +12,12 @@
 
 ### self checklist
 
-[] 是否完成方案评审或问题根因分析
+- [] 是否完成方案评审或问题根因分析
 
-[] 是否完成了功能模块的UT/ST，并执行通过
+- [] 是否完成了功能模块的UT/ST，并执行通过
 
-[] 是否涉及公共组件或跨组件修改，并完成涉及组件冒烟用例测试，附上结果
+- [] 是否涉及公共组件或跨组件修改，并完成涉及组件冒烟用例测试，附上结果
 
-[] 是否涉及资料修改，涉及时需同步修改
+- [] 是否涉及资料修改，涉及时需同步修改
 
-[] 是否涉及兼容性，相关组件是否进行变更通知（消息知会 + 邮件知会）
\ No newline at end of file
+- [] 是否涉及兼容性，相关组件是否进行变更通知（消息知会）
\ No newline at end of file
-- 
Gitee