From 8987ba6aee86460d2491d614cf4cfa1772b87aa9 Mon Sep 17 00:00:00 2001 From: yoni Date: Wed, 11 Jan 2023 11:54:31 +0200 Subject: [PATCH 1/5] speed tests --- .../delegate/tensorrt/op/encoder_tensorrt.cc | 16 ++-- trc/readers/mindir/readir.cc | 81 +++++++++++-------- trc/transformer/deploy.sh | 10 +-- trc/transformer/models.txt | 9 ++- 4 files changed, 69 insertions(+), 47 deletions(-) diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc index 893c7341b3a..bbf489fde5a 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc @@ -133,11 +133,13 @@ int EncoderTensorRT::AddInnerOp(TensorRTContext *ctx) { auto plugin = std::make_shared(input_tensor->getName(), compute_type, params, GetCublasLtHandle(), device_id_); const int input_number = inputs().size(); - nvinfer1::ITensor *inputTensors[input_number]; + nvinfer1::ITensor *inputTensors[input_number+1]; for (int i = 0; i < input_number; i++) { inputTensors[i] = input(ctx, i).trt_tensor_; } - nvinfer1::IPluginV2Layer *encoder_layer = ctx->network()->addPluginV2(inputTensors, input_number, *plugin); + auto network_input = ctx->network()->getInput(0); + inputTensors[input_number] = network_input; + nvinfer1::IPluginV2Layer *encoder_layer = ctx->network()->addPluginV2(inputTensors, input_number+1, *plugin); if (encoder_layer == nullptr) { MS_LOG(ERROR) << "add encoder op failed for TensorRT."; return RET_ERROR; @@ -179,7 +181,7 @@ int EncoderPlugin::RunCudaEncoder(const nvinfer1::PluginTensorDesc *inputDesc, const_cast(inputs[3]), const_cast(inputs[4]), const_cast(inputs[5]), const_cast(inputs[6]), const_cast(inputs[7]), const_cast(inputs[8]), const_cast(inputs[9]), const_cast(inputs[10]), const_cast(inputs[11]), - const_cast(inputs[12]), const_cast(inputs[13])}; + const_cast(inputs[12]), const_cast(inputs[13]), const_cast(inputs[14])}; void *outputs_forward[] = {outputs[0]}; fastertransformer::forwardEncoder(inputs_forward, num_of_inputs_, outputs_forward, num_of_outputs_, ¶ms_, workspace); @@ -189,9 +191,13 @@ int EncoderPlugin::RunCudaEncoder(const nvinfer1::PluginTensorDesc *inputDesc, bool EncoderPlugin::supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *tensorsDesc, int nbInputs, int nbOutputs) noexcept { auto type = (compute_type_ == RuntimePrecisionMode_FP16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; - for (int i = 0; i < pos; i++) { - if (tensorsDesc[pos].type != tensorsDesc[i].type) return false; + if (pos == nbInputs - 1) { + bool res = (tensorsDesc[pos].type == nvinfer1::DataType::kINT32)? true:false; + return res; } + // for (int i = 0; i < pos; i++) { + // if (tensorsDesc[pos].type != tensorsDesc[i].type) return false; + // } bool res = (tensorsDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) && (tensorsDesc[pos].type == type); return res; } diff --git a/trc/readers/mindir/readir.cc b/trc/readers/mindir/readir.cc index 98455e56035..bcbb8fd2316 100644 --- a/trc/readers/mindir/readir.cc +++ b/trc/readers/mindir/readir.cc @@ -118,23 +118,24 @@ float GetFloatSum(const float *data, int size) { } void printTensor(int id, TensorProto const &tensor, int limit = 0) { - std::cout << id << ")" << (tensor.has_name() ? tensor.name() : "no name") << std::endl; + std::cout << "tensor #"<< id << std::endl; + if (tensor.has_name()) { + std::cout << "name=" << tensor.name() << std::endl; + } if (tensor.has_doc_string()) { std::cout << "doc_string = " << tensor.doc_string(); } if (tensor.has_ref_key()) { std::cout << "ref_key=" << tensor.ref_key() << std::endl; } - - std::cout << " " - << "t:" << EnumAttributeType(tensor.data_type()) << ""; + std::cout << "data_type=" << EnumAttributeType(tensor.data_type()) << std::endl; if (tensor.has_raw_data()) { const char *data = tensor.raw_data().data(); std::cout << " size=(" << tensor.raw_data().size() << ")\n"; if (tensor.data_type() == TensorProto_DataType_FLOAT) { const float *float_data = reinterpret_cast(data); size_t size = tensor.raw_data().size() / sizeof(float); - std::cout << "data="; + std::cout << "data:"; for (size_t i = 0; i < std::min(size, static_cast(limit)); i++) { std::cout << float_data[i] << " "; } @@ -185,14 +186,20 @@ void printTensor(int id, TensorProto const &tensor, int limit = 0) { } void printAttr(int i, const AttributeProto &attr) { - std::cout << i << ":" << attr.name() << " " - << "( " << attr.ref_attr_name() << ")[" << EnumAttributeType(attr.type()) << "]" << std::endl << "{" ; - if (attr.has_t()) { - std::cout << "tensor:"; - printTensor(0, attr.t()); - } - if (attr.tensors_size() > 0) { - std::cout << "tensors:" << attr.tensors_size() << "\n"; + std::cout << "attr #" << i << std::endl; + if (attr.has_name()) { + std::cout << "name=" << attr.name() << std::endl; + } + if (attr.has_ref_attr_name()) { + std::cout << "ref_attr_name=" << attr.ref_attr_name() << std::endl; + } + std::cout << "type=" << EnumAttributeType(attr.type()) << std::endl; + if (attr.has_t()) { + std::cout << "t="; + printTensor(0, attr.t()); + } + if (attr.tensors_size() > 0) { + std::cout << "tensors: (" << attr.tensors_size() << ")" << std::endl; for (int i = 0; i < attr.tensors_size(); i++) printTensor(i, attr.tensors(i)); } @@ -232,7 +239,7 @@ void printAttr(int i, const AttributeProto &attr) { case AttributeProto_AttributeType_TUPLE: case AttributeProto_AttributeType_LIST: if (attr.values_size() > 0) { - std::cout << "values:" << std::endl; + std::cout << "values:" << "(" << attr.values_size() << ")" << std::endl; for (int i = 0; i < attr.values_size(); i++) { printAttr(i, attr.values(i)); } @@ -244,44 +251,44 @@ void printAttr(int i, const AttributeProto &attr) { std::cout << "}\n"; } -void printValue(ValueInfoProto const &val) { +void printValue(int id, ValueInfoProto const &val) { + std::cout << "value #" < &map) { - if (node.op_type().find("TupleGetItem") != std::string::npos) + auto op_type = node.op_type(); + if (op_type.find("TupleGetItem") != std::string::npos || op_type.find("MakeTuple") != std::string::npos) { auto th = map.find(node.input(0)); if (th != map.end()) { @@ -559,10 +567,15 @@ void printOut(ModelProto &model) { } for (int i = 0; i < graph.output_size(); i++) { const ValueInfoProto &v = graph.output(i); + if (v.tensor_size() > 1) { + + std::cout << v.tensor(0).name() << std::endl; + } else { auto t = map.find(v.name()); if (t != map.end()) { printOut(t->second,map); } + } } } diff --git a/trc/transformer/deploy.sh b/trc/transformer/deploy.sh index 4c7c40b111a..001af7d5e01 100755 --- a/trc/transformer/deploy.sh +++ b/trc/transformer/deploy.sh @@ -6,13 +6,11 @@ benchmark=${system}/tools/benchmark/benchmark readir=${base}/trc/readers/mindir/readir server=caspi gpu_id=0 -while getopts "ctC:" opt ; do +while getopts "ctG:" opt ; do case "${opt}" in - c) - compress="_compress" ;; t) time=true ;; - C) + G) gpu_id=$OPTARG ;; *) echo "Unknown option ${opt}!" ;; @@ -47,7 +45,7 @@ rsync -v cfg_${model}.config ${server}:$(realpath "cfg_${model}.config") # this should be more general ! # output_files=$(find . -maxdepth 1 -name ${model}_compress_output"*.txt*" | sort -n) -output_files=$(find . -maxdepth 1 -name ${model}${compress}_output"*.txt*" | sort -n) +output_files=$(find . -maxdepth 1 -name ${model}_output"*.txt*" | sort -n) input_files=$(find . -maxdepth 1 -name ${model}_input"*.fp32" | sort -n) rsync -v ${input_files} ${output_files} ${server}:${PWD} @@ -58,9 +56,9 @@ command="cd ${PWD} && " command+="LD_LIBRARY_PATH=${system}/runtime/lib:${system}/tools/converter/lib CUDA_VISIBLE_DEVICES=${gpu_id} " # command+=" NVIDIA_TF32_OVERRIDE=0 " command+="${benchmark} --modelFile=$1 --numThreads=1 --warmUpLoopCount=10 --loopCount=1000 --modelType=MindIR " +command+="--inDataFile=\"${input_files}\" " if [ "${time}" == "" ] then - command+="--inDataFile=\"${input_files}\"" command+=" --benchmarkDataFile=\"${output_files}\" " fi if [ -f cfg_${model}.config ]; then diff --git a/trc/transformer/models.txt b/trc/transformer/models.txt index 41bbc1106f8..b935afef259 100755 --- a/trc/transformer/models.txt +++ b/trc/transformer/models.txt @@ -29,9 +29,14 @@ #-b 8 -l 12 -H 4 -S 512 -s 128 -P 1 -m transformer_encoder_layer #-b 1 -l 12 -H 12 -S 768 -s 128 -P 0 -m transformer_encoder_layer #-b 4 -l 12 -H 12 -S 768 -s 128 -P 0 -m transformer_encoder_layer -#-b 8 -l 12 -H 12 -S 768 -s 128 -P 0 -m transformer_encoder_layer +#-b 8 -l 12 -H 12 -S 768 -s 128 -P 0 -c true -m transformer_encoder_layer + #-b 1 -l 12 -H 4 -S 512 -s 128 -P 0 -f 3072 -m transformer_encoder_layer #-b 64 -l 12 -H 12 -S 768 -s 128 -m bert +-b 16 -l 12 -H 12 -S 768 -s 512 -c 75 -m bert +-b 32 -l 12 -H 12 -S 768 -s 512 -c 75 -m bert +-b 64 -l 12 -H 12 -S 768 -s 512 -c 75 -m bert + #-b 64 -l 24 -H 12 -S 768 -s 128 -m bert # ------------------------- Tests coverage ----------------------------------- @@ -39,7 +44,7 @@ #-b 1 -l 66 -s 20 -t 30 -H 3 -S 15 -p 0 -m mha_cross #-b 1 -l 66 -s 20 -H 4 -S 768 -p 0 -m mha_T5 #-b 1 -l 66 -s 20 -t 40 -H 4 -S 768 -p 0 -m mha_T5_cross --b 1 -l 12 -H 12 -S 768 -s 128 -P 1 -m transformer_encoder_layer +#-b 1 -l 12 -H 12 -S 768 -s 128 -P 1 -m transformer_encoder_layer #-b 8 -l 12 -H 4 -S 512 -s 128 -P 0 -f 3072 -m transformer_encoder_layer #-b 16 -l 16 -H 8 -S 1024 -s 64 -P 1 -f 1024 -m transformer_encoder_layer #-b 32 -l 12 -H 4 -S 512 -s 128 -P 0 -f 3072 -m transformer_encoder_layer -- Gitee From 9cb48fa0d7b5ee673a53cf33027afc549d20e80d Mon Sep 17 00:00:00 2001 From: yoni Date: Sun, 15 Jan 2023 12:14:55 +0200 Subject: [PATCH 2/5] remove encoder layer --- trc/readers/flatbuf/readfb.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trc/readers/flatbuf/readfb.cc b/trc/readers/flatbuf/readfb.cc index 8dcaa8f0c5f..0b4056a6f92 100644 --- a/trc/readers/flatbuf/readfb.cc +++ b/trc/readers/flatbuf/readfb.cc @@ -322,12 +322,12 @@ void Graph::Print(const Attention *m) { PRINT_ATTR(m, cross); } -template<> +/* template<> void Graph::Print(const EncoderLayer *m) { PRINT_ATTR(m, head_num); PRINT_ATTR(m, head_size); } - + */ void Graph::printNode(const CNode *node, uint32_t node_index) { @@ -366,7 +366,7 @@ void Graph::printNode(const CNode *node, uint32_t node_index) { PRINT_NODE(ReduceFusion); PRINT_NODE(MaxPoolFusion); PRINT_NODE(Attention); - PRINT_NODE(EncoderLayer); + // PRINT_NODE(EncoderLayer); } -- Gitee From 7569f4d8803796bfabfde940b9a2f9aa81bcbbe6 Mon Sep 17 00:00:00 2001 From: batya kroizer Date: Thu, 9 Feb 2023 18:28:22 +0200 Subject: [PATCH 3/5] add encoder_layernorm fusion --- mindspore/core/ops/decoder_layer.cc | 20 ++++++-- mindspore/core/ops/decoder_layer.h | 12 +++-- mindspore/core/ops/encoder_layer.cc | 21 +++++++-- mindspore/core/ops/encoder_layer.h | 11 +++-- mindspore/core/ops/op_name.h | 3 ++ .../delegate/tensorrt/op/decoder_tensorrt.cc | 15 +++--- .../delegate/tensorrt/op/encoder_tensorrt.cc | 5 +- .../lite/tools/benchmark/benchmark_base.h | 1 + .../lite/tools/converter/anf_transform.cc | 3 +- .../optimizer/fusion/decoder_layer_fusion.cc | 46 +++++++++++++++---- .../optimizer/fusion/decoder_layer_fusion.h | 20 +++++--- .../optimizer/fusion/encoder_layer_fusion.cc | 39 ++++++++++++---- .../optimizer/fusion/encoder_layer_fusion.h | 10 +++- trc/transformer/cfg_bert.config | 2 +- trc/transformer/deploy.sh | 2 +- trc/transformer/models.txt | 15 ++++-- trc/transformer/train_transformer_export.py | 4 +- 17 files changed, 171 insertions(+), 58 deletions(-) diff --git a/mindspore/core/ops/decoder_layer.cc b/mindspore/core/ops/decoder_layer.cc index 91d725c5b15..b1e0f6f35af 100644 --- a/mindspore/core/ops/decoder_layer.cc +++ b/mindspore/core/ops/decoder_layer.cc @@ -43,6 +43,9 @@ void DecoderLayer::set_eps_layernorm2(float eps_layernorm2) { void DecoderLayer::set_eps_layernorm3(float eps_layernorm3) { (void)this->AddAttr(kDecoderLayerEpsLayerNorm3, api::MakeValue(eps_layernorm3)); } +void DecoderLayer::set_eps_layernorm4(float eps_layernorm4) { + (void)this->AddAttr(kDecoderLayerEpsLayerNorm4, api::MakeValue(eps_layernorm4)); +} void DecoderLayer::set_ffn_hidden_size(int64_t ffn_hidden_size) { (void)this->AddAttr(kDecoderLayerFfnHiddenSize, api::MakeValue(ffn_hidden_size)); } @@ -55,6 +58,8 @@ void DecoderLayer::set_position_bias2(bool position_bias2) { void DecoderLayer::set_scale1(float scale1) { (void)this->AddAttr(kDecoderLayerScale1, api::MakeValue(scale1)); } void DecoderLayer::set_scale2(float scale2) { (void)this->AddAttr(kDecoderLayerScale2, api::MakeValue(scale2)); } void DecoderLayer::set_act_type(ActType act_type) { (void)this->AddAttr(kActivationType, api::MakeValue(act_type)); } +void DecoderLayer::set_layer_norm(bool layer_norm) { (void)this->AddAttr(kLayerNorm, api::MakeValue(layer_norm)); } + int64_t DecoderLayer::get_head_num() const { auto value_ptr = this->GetAttr(kDecoderLayerNumHeads); return GetValue(value_ptr); @@ -81,6 +86,10 @@ float DecoderLayer::get_eps_layernorm3() const { auto value_ptr = this->GetAttr(kDecoderLayerEpsLayerNorm3); return GetValue(value_ptr); } +float DecoderLayer::get_eps_layernorm4() const { + auto value_ptr = this->GetAttr(kDecoderLayerEpsLayerNorm4); + return GetValue(value_ptr); +} int64_t DecoderLayer::get_ffn_hidden_size() const { auto value_ptr = this->GetAttr(kDecoderLayerFfnHiddenSize); return GetValue(value_ptr); @@ -108,22 +117,27 @@ ActType DecoderLayer::get_act_type() const { } return ActType(GetValue(value_ptr)); } - +bool DecoderLayer::get_layer_norm() const { + auto value_ptr = this->GetAttr(kDecoderLayeNorm); + return GetValue(value_ptr); +} void DecoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, - float eps_layernorm3, int64_t ffn_hidden_size, bool position_bias1, bool position_bias2, - bool post_layernorm, float scale1, float scale2, ActType act_type) { + float eps_layernorm3, float eps_layernorm4, int64_t ffn_hidden_size, bool position_bias1, bool position_bias2, + bool post_layernorm, float scale1, float scale2, ActType act_type, bool layer_norm) { this->set_head_num(head_num); this->set_head_size(head_size); this->set_post_layernorm(post_layernorm); this->set_eps_layernorm1(eps_layernorm1); this->set_eps_layernorm2(eps_layernorm2); this->set_eps_layernorm3(eps_layernorm3); + this->set_eps_layernorm4(eps_layernorm4); this->set_ffn_hidden_size(ffn_hidden_size); this->set_position_bias1(position_bias1); this->set_position_bias2(position_bias2); this->set_act_type(act_type); this->set_scale1(scale1); this->set_scale2(scale2); + this->set_scale2(layer_norm); } REGISTER_PRIMITIVE_C(kNameDecoderLayer, DecoderLayer); } // namespace mindspore::ops diff --git a/mindspore/core/ops/decoder_layer.h b/mindspore/core/ops/decoder_layer.h index b196689eb2f..88a8f2ad741 100644 --- a/mindspore/core/ops/decoder_layer.h +++ b/mindspore/core/ops/decoder_layer.h @@ -64,39 +64,45 @@ class MIND_API DecoderLayer : public BaseOperator { /// \param[in] eps_layernorm1 Define eps layernorm1. /// \param[in] eps_layernorm2 Define eps layernorm2. /// \param[in] eps_layernorm3 Define eps layernorm3. + /// \param[in] eps_layernorm3 Define eps layernorm3. /// \param[in] ffn_hidden_size Define ffn hidden size. /// \param[in] position_bias1 Define position_bias1. /// \param[in] position_bias2 Define position_bias2. /// \param[in] scale1 Define scale1. /// \param[in] scale2 Define scale2. /// \param[in] act_type Define act_type. - void Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, float eps_layernorm3, + /// \param[in] layer_norm Define act_type. + void Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, float eps_layernorm3, float eps_layernorm4, int64_t ffn_hidden_size, bool position_bias1, bool position_bias2, bool post_layernorm, float scale1 = 1.0f, - float scale2 = 1.0f, ActType act_type = ActType::ActType_Gelu); + float scale2 = 1.0f, ActType act_type = ActType::ActType_Gelu, bool layer_norm = false); void set_head_num(int64_t head_num); void set_head_size(int64_t head_size); void set_post_layernorm(bool post_layernorm); void set_eps_layernorm1(float eps_layernorm1); void set_eps_layernorm2(float eps_layernorm2); - void set_eps_layernorm3(float eps_layernorm2); + void set_eps_layernorm3(float eps_layernorm3); + void set_eps_layernorm4(float eps_layernorm4); void set_ffn_hidden_size(int64_t ffn_hidden_size); void set_position_bias1(bool position_bias1); void set_position_bias2(bool position_bias2); void set_scale1(float scale1); void set_scale2(float scale2); void set_act_type(ActType act_type); + void set_layer_norm(bool layer_norm); int64_t get_head_num() const; int64_t get_head_size() const; bool get_post_layernorm() const; float get_eps_layernorm1() const; float get_eps_layernorm2() const; float get_eps_layernorm3() const; + float get_eps_layernorm4() const; int64_t get_ffn_hidden_size() const; bool get_position_bias1() const; bool get_position_bias2() const; float get_scale1() const; float get_scale2() const; ActType get_act_type() const; + bool get_layer_norm() const; }; } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/encoder_layer.cc b/mindspore/core/ops/encoder_layer.cc index 146c869ea0a..fccb4de82d0 100644 --- a/mindspore/core/ops/encoder_layer.cc +++ b/mindspore/core/ops/encoder_layer.cc @@ -40,6 +40,9 @@ void EncoderLayer::set_eps_layernorm1(float eps_layernorm1) { void EncoderLayer::set_eps_layernorm2(float eps_layernorm2) { (void)this->AddAttr(kEncoderLayerEpsLayerNorm2, api::MakeValue(eps_layernorm2)); } +void EncoderLayer::set_eps_layernorm3(float eps_layernorm3) { + (void)this->AddAttr(kEncoderLayerEpsLayerNorm3, api::MakeValue(eps_layernorm3)); +} void EncoderLayer::set_ffn_hidden_size(int64_t ffn_hidden_size) { (void)this->AddAttr(kEncoderLayerFfnHiddenSize, api::MakeValue(ffn_hidden_size)); } @@ -47,13 +50,13 @@ void EncoderLayer::set_position_bias(bool position_bias) { (void)this->AddAttr(kPositionBias, api::MakeValue(position_bias)); } void EncoderLayer::set_scale(float scale) { (void)this->AddAttr(kScale, api::MakeValue(scale)); } +void EncoderLayer::set_layer_norm(bool layer_norm) { (void)this->AddAttr(kLayerNorm, api::MakeValue(layer_norm)); } void EncoderLayer::set_act_type(ActType act_type) { (void)this->AddAttr(kActivationType, api::MakeValue(act_type));} int64_t EncoderLayer::get_head_num() const { auto value_ptr = this->GetAttr(kEncoderLayerNumHeads); return GetValue(value_ptr); } - int64_t EncoderLayer::get_head_size() const { auto value_ptr = this->GetAttr(kEncoderLayerSizePerHead); return GetValue(value_ptr); @@ -71,6 +74,10 @@ float EncoderLayer::get_eps_layernorm2() const { auto value_ptr = this->GetAttr(kEncoderLayerEpsLayerNorm2); return GetValue(value_ptr); } +float EncoderLayer::get_eps_layernorm3() const { + auto value_ptr = this->GetAttr(kEncoderLayerEpsLayerNorm3); + return GetValue(value_ptr); +} int64_t EncoderLayer::get_ffn_hidden_size() const { auto value_ptr = this->GetAttr(kEncoderLayerFfnHiddenSize); return GetValue(value_ptr); @@ -90,18 +97,24 @@ ActType EncoderLayer::get_act_type() const { } return ActType(GetValue(value_ptr)); } -void EncoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, - int64_t ffn_hidden_size, bool position_bias, bool post_layernorm, float scale, - ActType act_type) { +bool EncoderLayer::get_layer_norm() const { + auto value_ptr = this->GetAttr(kDecoderLayeNorm); + return GetValue(value_ptr); +} +void EncoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, + float eps_layernorm3, int64_t ffn_hidden_size, bool position_bias, bool post_layernorm, + float scale, ActType act_type, bool layer_norm) { this->set_head_num(head_num); this->set_head_size(head_size); this->set_post_layernorm(post_layernorm); this->set_eps_layernorm1(eps_layernorm1); this->set_eps_layernorm2(eps_layernorm2); + this->set_eps_layernorm3(eps_layernorm3); this->set_ffn_hidden_size(ffn_hidden_size); this->set_position_bias(position_bias); this->set_act_type(act_type); this->set_scale(scale); + this->set_layer_norm(layer_norm); } REGISTER_PRIMITIVE_C(kNameEncoderLayer, EncoderLayer); } // namespace mindspore::ops diff --git a/mindspore/core/ops/encoder_layer.h b/mindspore/core/ops/encoder_layer.h index 628e897045b..dd7794d5ed4 100644 --- a/mindspore/core/ops/encoder_layer.h +++ b/mindspore/core/ops/encoder_layer.h @@ -42,31 +42,36 @@ class MIND_API EncoderLayer : public BaseOperator { /// \param[in] head_size Define size per head. /// \param[in] eps_layernorm1 Define eps layernorm1. /// \param[in] eps_layernorm2 Define eps layernorm2. + /// \param[in] eps_layernorm3 Define eps layernorm3. /// \param[in] ffn_hidden_size Define ffn hidden size. /// \param[in] position_bias Define position_bias. /// \param[in] scale Define scale. /// \param[in] act_type Define act_type. - - void Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, int64_t ffn_hidden_size, - bool position_bias, bool post_layernorm, float scale = 1.0f, ActType act_type = ActType::ActType_Gelu); + /// \param[in] layer_norm Define act_type. + void Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, float eps_layernorm3, int64_t ffn_hidden_size, + bool position_bias, bool post_layernorm, float scale = 1.0f, ActType act_type = ActType::ActType_Gelu, bool layer_norm = false); void set_head_num(int64_t head_num); void set_head_size(int64_t head_size); void set_post_layernorm(bool post_layernorm); void set_eps_layernorm1(float eps_layernorm1); void set_eps_layernorm2(float eps_layernorm2); + void set_eps_layernorm3(float eps_layernorm3); void set_ffn_hidden_size(int64_t ffn_hidden_size); void set_position_bias(bool position_bias); void set_scale(float scale); void set_act_type(ActType act_type); + void set_layer_norm(bool layer_norm); int64_t get_head_num() const; int64_t get_head_size() const; bool get_post_layernorm() const; float get_eps_layernorm1() const; float get_eps_layernorm2() const; + float get_eps_layernorm3() const; int64_t get_ffn_hidden_size() const; bool get_position_bias() const; float get_scale() const; ActType get_act_type() const; + bool get_layer_norm() const; }; } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/op_name.h b/mindspore/core/ops/op_name.h index caf14dc257a..f54e7ae6e32 100644 --- a/mindspore/core/ops/op_name.h +++ b/mindspore/core/ops/op_name.h @@ -385,6 +385,7 @@ constexpr auto kEncoderLayerPostLayernorm = "post_layernorm"; constexpr auto kEncoderLayerFfnHiddenSize = "ffn_hidden_size"; constexpr auto kEncoderLayerEpsLayerNorm1 = "eps_layernorm1"; constexpr auto kEncoderLayerEpsLayerNorm2 = "eps_layernorm2"; +constexpr auto kEncoderLayerEpsLayerNorm3 = "eps_layernorm3"; constexpr auto kDecoderLayerNumHeads = "head_num"; constexpr auto kDecoderLayerSizePerHead = "head_size"; constexpr auto kDecoderLayerPostLayernorm = "post_layernorm"; @@ -392,8 +393,10 @@ constexpr auto kDecoderLayerFfnHiddenSize = "ffn_hidden_size"; constexpr auto kDecoderLayerEpsLayerNorm1 = "eps_layernorm1"; constexpr auto kDecoderLayerEpsLayerNorm2 = "eps_layernorm2"; constexpr auto kDecoderLayerEpsLayerNorm3 = "eps_layernorm3"; +constexpr auto kDecoderLayerEpsLayerNorm4 = "eps_layernorm4"; constexpr auto kDecoderLayerPositionBias1 = "position_bias1"; constexpr auto kDecoderLayerPositionBias2 = "position_bias2"; +constexpr auto kLayerNorm = "layer_norm"; constexpr auto kDecoderLayerScale1 = "scale1"; constexpr auto kDecoderLayerScale2 = "scale2"; constexpr auto kPositionBias = "position_bias"; diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc index 16d7581a936..4c8c8a4af7b 100755 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc @@ -109,6 +109,7 @@ int DecoderTensorRT::AddInnerOp(TensorRTContext *ctx) { params.decoder.eps1 = decoder_op->get_eps_layernorm1(); params.decoder.eps2 = decoder_op->get_eps_layernorm2(); params.decoder.eps3 = decoder_op->get_eps_layernorm3(); + params.decoder.eps4 = decoder_op->get_eps_layernorm4(); params.ffn_param.ffn_param.ffn_hidden_size = decoder_op->get_ffn_hidden_size(); params.ffn_param.ffn_param.ffn_fp16 = is_ffn_fp16_; params.ffn_param.ffn_param.act_type = (fastertransformer::ActType)(decoder_op->get_act_type()); @@ -126,6 +127,7 @@ int DecoderTensorRT::AddInnerOp(TensorRTContext *ctx) { params.attn2.attn.scale = decoder_op->get_scale2(); params.attn2.attn.mask = true; params.decoder.has_beta = !params.attn1.attn.position_bias; + params.decoder.is_layernorm = decoder_op->get_layer_norm(); auto compute_type = runtime_->GetRuntimePrecisionMode(); if (is_ffn_fp16_) { size_t start_fp16 = (params.attn1.attn.position_bias) ? C13NUM : C18NUM; @@ -146,15 +148,16 @@ int DecoderTensorRT::AddInnerOp(TensorRTContext *ctx) { nvinfer1::ITensor *input_tensor = input(ctx, 0).trt_tensor_; auto plugin = std::make_shared(input_tensor->getName(), compute_type, params, device_id_); const int input_number = inputs().size(); - nvinfer1::ITensor *inputTensors[input_number+2]; + nvinfer1::ITensor *inputTensors[input_number + C2NUM]; for (int i = 0; i < input_number; i++) { inputTensors[i] = input(ctx, i).trt_tensor_; } + auto network_input1 = ctx->network()->getInput(0); - auto network_input2 = ctx->network()->getInput(2); + auto network_input2 = ctx->network()->getInput(C3NUM); inputTensors[input_number] = network_input1; - inputTensors[input_number + 1] = network_input2; - nvinfer1::IPluginV2Layer *decoder_layer = ctx->network()->addPluginV2(inputTensors, input_number + 2, *plugin); + inputTensors[input_number + C1NUM] = network_input2; + nvinfer1::IPluginV2Layer *decoder_layer = ctx->network()->addPluginV2(inputTensors, input_number + C2NUM, *plugin); if (decoder_layer == nullptr) { MS_LOG(ERROR) << "add decoder op failed for TensorRT."; return RET_ERROR; @@ -194,6 +197,7 @@ int DecoderPlugin::RunCudaDecoder(const nvinfer1::PluginTensorDesc *inputDesc, for (int i = 0; i < num_of_inputs_; i++) { inputs_forward[i] = const_cast(inputs[i]); } + std::cout<<"decoder shape :"<(inputs_forward, num_of_inputs_, outputs_forward, num_of_outputs_, ¶ms_, workspace); @@ -207,9 +211,6 @@ bool DecoderPlugin::supportsFormatCombination(int pos, const nvinfer1::PluginTen bool res = (tensorsDesc[pos].type == nvinfer1::DataType::kINT32)? true:false; return res; } - // for (int i = 0; i < pos; i++) { - // if (tensorsDesc[pos].type != tensorsDesc[i].type) return false; - // } bool res = (tensorsDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) && (tensorsDesc[pos].type == type); return res; } diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc index 04270aeacb2..b7b5e29b9e2 100755 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc @@ -111,9 +111,11 @@ int EncoderTensorRT::AddInnerOp(TensorRTContext *ctx) { params.common_param.hidden_size = params.common_param.head_num * params.common_param.head_size; //connect commonparam to attention and ffn //update encoder_param_ + params.encoder.is_layernorm = encoder_op->get_layer_norm(); params.encoder.layernorm_post = encoder_op->get_post_layernorm(); params.encoder.eps1 = encoder_op->get_eps_layernorm1(); params.encoder.eps2 = encoder_op->get_eps_layernorm2(); + params.encoder.eps3 = encoder_op->get_eps_layernorm3(); params.ffn_param.ffn_param.ffn_hidden_size = encoder_op->get_ffn_hidden_size(); params.ffn_param.ffn_param.ffn_fp16 = is_ffn_fp16_; params.attn.attn.is_cross = false; @@ -211,9 +213,6 @@ bool EncoderPlugin::supportsFormatCombination(int pos, const nvinfer1::PluginTen bool res = (tensorsDesc[pos].type == nvinfer1::DataType::kINT32)? true:false; return res; } - // for (int i = 0; i < pos; i++) { - // if (tensorsDesc[pos].type != tensorsDesc[i].type) return false; - // } bool res = (tensorsDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) && (tensorsDesc[pos].type == type); return res; } diff --git a/mindspore/lite/tools/benchmark/benchmark_base.h b/mindspore/lite/tools/benchmark/benchmark_base.h index 1511c0cf719..e5820b42173 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.h +++ b/mindspore/lite/tools/benchmark/benchmark_base.h @@ -329,6 +329,7 @@ class MS_API BenchmarkBase { auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); + std::cout<<"j = "<("gamma4"); + MS_CHECK_TRUE_RET(gamma4_ != nullptr, false); + beta4_ = std::make_shared("beta4"); + MS_CHECK_TRUE_RET(beta4_ != nullptr, false); weight_attn_qkv_ = std::make_shared("weight_attn_qkv"); MS_CHECK_TRUE_RET(weight_attn_qkv_ != nullptr, false); weight_attn_q_ = std::make_shared("weight_attn_q_"); @@ -100,6 +104,8 @@ bool DecoderLayerFusion::Init() const { MS_CHECK_TRUE_RET(eps2_ != nullptr, false); eps3_ = std::make_shared("eps3_"); MS_CHECK_TRUE_RET(eps3_ != nullptr, false); + eps4_ = std::make_shared("eps4_"); + MS_CHECK_TRUE_RET(eps4_ != nullptr, false); return true; } @@ -144,9 +150,8 @@ VectorRef DecoderLayerFusion::DefineLayerNorm(VectorRef input, VarPtr gamma, Var auto mul = VectorRef({is_mul, real_div, gamma}); return mul; } - VectorRef DecoderLayerFusion::DefinePatternDecoderLayer(bool post_layernorm = true, bool layernorm_fusion = false, - bool is_position_bias = false, bool mask = true) const { + bool is_position_bias = false, bool mask = true, bool is_layer_norm=false) const { auto is_reshape1 = std::make_shared(std::bind(IsOpType, p1, prim::kPrimReshape), "reshape-decoder"); MS_CHECK_TRUE_RET(is_reshape1 != nullptr, {}); auto var1 = std::make_shared("var1-reshape"); @@ -166,7 +171,6 @@ VectorRef DecoderLayerFusion::DefinePatternDecoderLayer(bool post_layernorm = tr } if (mask) inputs.push_back(mask_); auto attention = VectorRef(inputs); - // return attention; if (is_position_bias) { tuple4 = attention; } else { @@ -242,7 +246,10 @@ VectorRef DecoderLayerFusion::DefinePatternDecoderLayer(bool post_layernorm = tr auto reshape4 = (post_layernorm) ? VectorRef({is_reshape4, tuple3, var4}) : VectorRef({is_reshape4, add3, var4}); auto is_add4 = std::make_shared(std::bind(IsOpType, p1, prim::kPrimAddFusion), "is_add4"); auto add4 = VectorRef({is_add4, reshape4, reshape3}); - return add4; + if(is_layer_norm) + return DefineLayerNorm(add4, gamma4_, beta4_, eps4_); + else + return add4; } std::unordered_map DecoderLayerFusion::DefinePatterns() const { @@ -251,6 +258,7 @@ std::unordered_map DecoderLayerFusion::DefinePatterns() MS_LOG(ERROR) << "initial member failed."; return patterns; } + patterns[kPatternDecoderLayerNormT5Pre] = DefinePatternDecoderLayer(false, false, true, true, true); patterns[kPatternDecoderLayerPre] = DefinePatternDecoderLayer(false, true, false, true); patterns[kPatternDecoderLayerPost] = DefinePatternDecoderLayer(true, true, false, true); patterns[kPatternDecoderLayerNormPre] = DefinePatternDecoderLayer(false, false, false, true); @@ -265,9 +273,12 @@ AnfNodePtr DecoderLayerFusion::Process(const std::string &pattern_name, const mi if (func_graph == nullptr || node == nullptr || equiv == nullptr) { return nullptr; } - if (pattern_name == kPatternDecoderT5Pre || pattern_name == kPatternDecoderT5Post) { + if (pattern_name == kPatternDecoderT5Pre || pattern_name == kPatternDecoderT5Post || pattern_name == kPatternDecoderLayerNormT5Pre) { is_position_bias_ = true; } + std::cout<<"pattern_name= "< DecoderLayerFusion::CreatePrim(const FuncGrap float eps1 = 1e-6; float eps2 = 1e-6; float eps3 = 1e-6; + float eps4 = 1e-6; bool is_position_bias1 = false; bool is_position_bias2 = false; float scale1 = 1.0f; float scale2 = 1.0f; - if (CheckPattern(func_graph, equiv, &head_num, &head_size, &eps1, &eps2, &eps3, &is_position_bias1, + if (CheckPattern(func_graph, equiv, &head_num, &head_size, &eps1, &eps2, &eps3, &eps4, &is_position_bias1, &is_position_bias2, &scale1, &scale2)) { return nullptr; } - decoder_layer_prim->Init(head_num, head_size, eps1, eps2, eps3, ffn_hidden_size, is_position_bias1, is_position_bias2, - post_layernorm, scale1, scale2, act_type_); + decoder_layer_prim->Init(head_num, head_size, eps1, eps2, eps3, eps4, ffn_hidden_size, is_position_bias1, is_position_bias2, + post_layernorm, scale1, scale2, act_type_, is_layernorm_); return decoder_layer_prim; } @@ -449,7 +468,7 @@ CNodePtr DecoderLayerFusion::CreateMaskedDecoderLayerFusionNode(const FuncGraphP auto encoder_output = utils::cast((*equiv)[encoder_output_]); MS_ASSERT(encoder_output != nullptr); AnfNodePtr position_bias, input_mask, bias_attn_o, bias_attn_qkv, beta1, beta2, bias_m, bias_p, beta3, - bias_attn_cross_qkv, bias_attn_cross_o, position_bias_cross; + bias_attn_cross_qkv, bias_attn_cross_o, position_bias_cross, gamma4, beta4; auto weight_qkv = utils::cast((*equiv)[weight_attn_qkv_]); auto weight_attn_o = utils::cast((*equiv)[weight_attn_o_]); auto weight_attn_q = utils::cast((*equiv)[weight_attn_q_]); @@ -470,10 +489,15 @@ CNodePtr DecoderLayerFusion::CreateMaskedDecoderLayerFusionNode(const FuncGraphP beta1 = utils::cast((*equiv)[beta1_]); beta2 = utils::cast((*equiv)[beta2_]); beta3 = utils::cast((*equiv)[beta3_]); + if(is_layernorm_) + beta4 = utils::cast((*equiv)[beta3_]); } auto gamma1 = utils::cast((*equiv)[gamma1_]); auto gamma2 = utils::cast((*equiv)[gamma2_]); auto gamma3 = utils::cast((*equiv)[gamma3_]); + if(is_layernorm_) + gamma4 = utils::cast((*equiv)[gamma3_]); + input_mask = mask ? utils::cast((*equiv)[mask_]) : nullptr; auto cross_mask = utils::cast((*equiv)[cross_mask_]); auto base_shape_ptr = weight_m->Shape(); @@ -498,6 +522,7 @@ CNodePtr DecoderLayerFusion::CreateMaskedDecoderLayerFusionNode(const FuncGraphP if (mask) new_node_inputs.push_back(cross_mask); new_node_inputs.insert(new_node_inputs.end(), {position_bias_cross, weight_attn_cross_o, gamma3, weight_m, weight_p}); + if(is_layernorm_) new_node_inputs.push_back(gamma4); } else { new_node_inputs.insert(new_node_inputs.end(), {beta1, weight_qkv, bias_attn_qkv}); if (mask) new_node_inputs.push_back(input_mask); @@ -506,6 +531,7 @@ CNodePtr DecoderLayerFusion::CreateMaskedDecoderLayerFusionNode(const FuncGraphP if (mask) new_node_inputs.push_back(cross_mask); new_node_inputs.insert(new_node_inputs.end(), {weight_attn_cross_o, bias_attn_cross_o, gamma3, beta3, weight_m, bias_m, weight_p, bias_p}); + if(is_layernorm_) new_node_inputs.insert(new_node_inputs.end(),{gamma4,beta4}); } auto new_node = func_graph->NewCNode(new_node_inputs); MS_CHECK_TRUE_RET(new_node != nullptr, nullptr); diff --git a/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.h b/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.h index f5faec283e2..b7accd37869 100644 --- a/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.h +++ b/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.h @@ -32,8 +32,8 @@ namespace mindspore { namespace opt { class DecoderLayerFusion : public MultiplePatternProcessPass { public: - explicit DecoderLayerFusion(const std::string &name = "DecoderLayerFusion", bool multigraph = true) - : MultiplePatternProcessPass(name, multigraph) {} + explicit DecoderLayerFusion(bool layer_norm, const std::string &name = "DecoderLayerFusion", bool multigraph = true) + : MultiplePatternProcessPass(name, multigraph) { layer_norm_ = layer_norm;} ~DecoderLayerFusion() override = default; @@ -46,7 +46,7 @@ class DecoderLayerFusion : public MultiplePatternProcessPass { private: VectorRef DefinePatternDecoderLayer(bool post_layernorm, bool layernorm_fusion, bool is_position_bias, - bool mask) const; + bool mask, bool is_layer_norm) const; VectorRef getTuple(bool post_layernorm, bool layernorm_fusion, bool is_position_bias) const; VectorRef DefineLayerNorm(VectorRef input, VarPtr gamma, VarPtr beta, VarPtr eps) const; CNodePtr CreateMaskedDecoderLayerFusionNode(const FuncGraphPtr &func_graph, const EquivPtr &equiv, @@ -54,17 +54,18 @@ class DecoderLayerFusion : public MultiplePatternProcessPass { std::shared_ptr CreatePrim(const FuncGraphPtr &func_graph, const EquivPtr &equiv, bool post_layernorm, int64_t ffn_hidden_size) const; lite::STATUS CheckPattern(const FuncGraphPtr &func_graph, const EquivPtr &equiv, int *head_num, int *head_size, - float *eps1, float *eps2, float *eps3, bool *is_position_bias1, bool *is_position_bias2, + float *eps1, float *eps2, float *eps3, float *eps4, bool *is_position_bias1, bool *is_position_bias2, float *scale1, float *scale2) const; AnfNodePtr GetAttribute(const FuncGraphPtr &func_graph, const EquivPtr &equiv, VarPtr node_name) const; bool IsActGELU(const FuncGraphPtr &func_graph, const EquivPtr &equiv) const; lite::STATUS GetEps(const EquivPtr &equiv, VarPtr node_name, float *eps) const; - + VectorRef DefineDecoderLayerNorm(VectorRef input, VarPtr gamma, VarPtr eps) const ; protected: const std::string kPatternDecoderLayerPre = "PatternDecoderLayerPre"; const std::string kPatternDecoderLayerPost = "PatternDecoderLayerPost"; - const std::string kPatternDecoderLayerNormPre = "kPatternDecoderLayerNormPre"; - const std::string kPatternDecoderLayerNormPost = "kPatternDecoderLayerNormPost"; + const std::string kPatternDecoderLayerNormPre = "PatternDecoderLayerNormPre"; + const std::string kPatternDecoderLayerNormPost = "PatternDecoderLayerNormPost"; + const std::string kPatternDecoderLayerNormT5Pre = "PatternDecoderLayerNormT5Pre"; const std::string kPatternDecoderT5Pre = "PatternDecoderT5Pre"; const std::string kPatternDecoderT5Post = "PatternDecoderT5Post"; mutable VarPtr hidden_stats_{nullptr}; @@ -75,7 +76,9 @@ class DecoderLayerFusion : public MultiplePatternProcessPass { mutable VarPtr beta2_{nullptr}; mutable VarPtr gamma2_{nullptr}; mutable VarPtr gamma3_{nullptr}; + mutable VarPtr gamma4_{nullptr}; mutable VarPtr beta3_{nullptr}; + mutable VarPtr beta4_{nullptr}; mutable VarPtr weight_attn_qkv_{nullptr}; mutable VarPtr weight_attn_qkv_cross_{nullptr}; mutable VarPtr weight_attn_o_{nullptr}; @@ -104,9 +107,12 @@ class DecoderLayerFusion : public MultiplePatternProcessPass { mutable VarPtr eps1_{nullptr}; mutable VarPtr eps2_{nullptr}; mutable VarPtr eps3_{nullptr}; + mutable VarPtr eps4_{nullptr}; mutable bool is_position_bias_{false}; mutable bool is_layernorm_fusion_{false}; + mutable bool is_layernorm_{false}; mutable ActType act_type_{ActType::ActType_No}; + mutable bool layer_norm_; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc index 701b86a6307..0c0c78f4146 100644 --- a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc @@ -42,6 +42,10 @@ bool EncoderLayerFusion::Init() const { MS_CHECK_TRUE_RET(beta2_ != nullptr, false); gamma2_ = std::make_shared("gamma2"); MS_CHECK_TRUE_RET(gamma2_ != nullptr, false); + beta3_ = std::make_shared("beta3"); + MS_CHECK_TRUE_RET(beta3_ != nullptr, false); + gamma3_ = std::make_shared("gamma3"); + MS_CHECK_TRUE_RET(gamma3_ != nullptr, false); weight_attn_qkv_ = std::make_shared("weight_attn_qkv"); MS_CHECK_TRUE_RET(weight_attn_qkv_ != nullptr, false); weight_attn_o_ = std::make_shared(IsParamNode, "weight_attn_o"); @@ -74,6 +78,8 @@ bool EncoderLayerFusion::Init() const { MS_CHECK_TRUE_RET(eps1_ != nullptr, false); eps2_ = std::make_shared("eps2_"); MS_CHECK_TRUE_RET(eps2_ != nullptr, false); + eps3_ = std::make_shared("eps3_"); + MS_CHECK_TRUE_RET(eps3_ != nullptr, false); return true; } @@ -139,7 +145,7 @@ VectorRef EncoderLayerFusion::DefineLayerNorm(bool is_position_bias, VectorRef i } VectorRef EncoderLayerFusion::DefinePatternEncoderLayer(bool post_layernorm = true, bool layernorm_fusion = false, - bool is_position_bias = false, bool mask = true) const { + bool is_position_bias = false, bool mask = true, bool is_layer_norm = false) const { VectorRef tuple, tuple2, tuple3, reshape2, matmul1, inputs; auto is_reshape1 = std::make_shared(std::bind(IsOpType, p1, prim::kPrimReshape), "reshape-encoder"); MS_CHECK_TRUE_RET(is_reshape1 != nullptr, {}); @@ -211,6 +217,8 @@ VectorRef EncoderLayerFusion::DefinePatternEncoderLayer(bool post_layernorm = tr auto reshape3 = VectorRef({is_reshape3, matmul2, var3}); auto is_add3 = std::make_shared(std::bind(IsOpType, p1, prim::kPrimAddFusion), "is_add3"); auto add3 = VectorRef({is_add3, reshape2, reshape3}); + if(is_layer_norm) + return DefineLayerNorm(is_position_bias, add3, gamma3_, beta3_, eps3_); if (!post_layernorm || !layernorm_fusion) { return add3; } @@ -241,6 +249,7 @@ std::unordered_map EncoderLayerFusion::DefinePatterns() MS_LOG(ERROR) << "initial member failed."; return patterns; } + patterns[kPatternEncoderLayerNormT5Pre] = DefinePatternEncoderLayer(false, false, true, true, true); patterns[kPatternEncoderLayerPre] = DefinePatternEncoderLayer(false); patterns[kPatternEncoderLayerPost] = DefinePatternEncoderLayer(true); patterns[kPatternEncoderLayerPostNorm] = DefinePatternEncoderLayer(true, true); @@ -255,10 +264,10 @@ AnfNodePtr EncoderLayerFusion::Process(const std::string &pattern_name, const mi if (func_graph == nullptr || node == nullptr || equiv == nullptr) { return nullptr; } - + if(pattern_name == kPatternEncoderLayerNormT5Pre) is_layernorm_ = true; if (pattern_name == kPatternEncoderLayerPostNorm || pattern_name == kPatternEncoderLayerPreNorm) is_layernorm_fusion_ = true; - if (pattern_name == kPatternEncoderLayerT5Pre || pattern_name == kPatternEncoderLayerT5Post) is_position_bias_ = true; + if (pattern_name == kPatternEncoderLayerT5Pre || pattern_name == kPatternEncoderLayerT5Post || pattern_name == kPatternEncoderLayerNormT5Pre) is_position_bias_ = true; bool mask = true; bool post_layernorm = false; if (pattern_name == kPatternEncoderLayerPost || pattern_name == kPatternEncoderLayerPostNorm || @@ -331,7 +340,7 @@ AnfNodePtr EncoderLayerFusion::GetAttribute(const FuncGraphPtr &func_graph, cons } STATUS EncoderLayerFusion::CheckPattern(const FuncGraphPtr &func_graph, const EquivPtr &equiv, int *head_num, - int *head_size, float *eps1, float *eps2, float *scale) const { + int *head_size, float *eps1, float *eps2, float *eps3, float *scale) const { auto attn_input = GetAttribute(func_graph, equiv, is_attention_); MS_ASSERT(attn_input != nullptr); auto attn_prim = ops::GetOperator(attn_input); @@ -368,6 +377,12 @@ STATUS EncoderLayerFusion::CheckPattern(const FuncGraphPtr &func_graph, const Eq MS_LOG(ERROR) << "not found eps2"; return RET_ERROR; } + if(is_layernorm_){ + if (GetEps(equiv, eps3_, eps3) != RET_OK) { + MS_LOG(ERROR) << "not found eps3"; + return RET_ERROR; + } + } } if (!is_position_bias_) { if (!IsActGELU(func_graph, equiv, is_act_)) { @@ -391,12 +406,13 @@ std::shared_ptr EncoderLayerFusion::CreatePrim(const FuncGrap int head_size = 0; float eps1 = 1e-5; float eps2 = 1e-5; + float eps3 = 1e-5; float scale = 1.0f; - if (CheckPattern(func_graph, equiv, &head_num, &head_size, &eps1, &eps2, &scale)) { + if (CheckPattern(func_graph, equiv, &head_num, &head_size, &eps1, &eps2, &eps3, &scale)) { return nullptr; } - encoder_layer_prim->Init(head_num, head_size, eps1, eps2, ffn_hidden_size, is_position_bias_, post_layernorm, scale, - act_type_); + encoder_layer_prim->Init(head_num, head_size, eps1, eps2, eps3, ffn_hidden_size, is_position_bias_, post_layernorm, scale, + act_type_, is_layernorm_); return encoder_layer_prim; } @@ -407,7 +423,8 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP MS_ASSERT(equiv != nullptr); MS_ASSERT(node != nullptr); auto input = utils::cast((*equiv)[input_]); - AnfNodePtr position_bias, input_mask, bias_attn_o, bias_attn_qkv, beta1, beta2, bias_m, bias_p, gamma3; + AnfNodePtr position_bias, input_mask, bias_attn_o, bias_attn_qkv, beta1, beta2, bias_m, bias_p, gamma3, + beta3, gamma3; auto weight_qkv = utils::cast((*equiv)[weight_attn_qkv_]); auto weight_attn_o = utils::cast((*equiv)[weight_attn_o_]); auto weight_m = utils::cast((*equiv)[weight_m_]); @@ -419,9 +436,13 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP bias_p = utils::cast((*equiv)[bias_p_]); beta1 = utils::cast((*equiv)[beta1_]); beta2 = utils::cast((*equiv)[beta2_]); + if(is_layernorm_) + beta3 = utils::cast((*equiv)[beta3_]); } auto gamma1 = utils::cast((*equiv)[gamma1_]); auto gamma2 = utils::cast((*equiv)[gamma2_]); + if(is_layernorm_) + auto gamma3 = utils::cast((*equiv)[gamma3_]); input_mask = mask ? utils::cast((*equiv)[mask_]) : nullptr; auto base_shape_ptr = weight_m->Shape(); MS_EXCEPTION_IF_NULL(base_shape_ptr); @@ -442,6 +463,7 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP new_node_inputs.insert(new_node_inputs.end(), {gamma1, weight_qkv}); if (mask) new_node_inputs.push_back(input_mask); new_node_inputs.insert(new_node_inputs.end(), {position_bias, weight_attn_o, gamma2, weight_m, weight_p}); + if(is_layernorm_) new_node_inputs.push_back(gamma3); } else { if (!post_layernorm) { new_node_inputs.insert(new_node_inputs.end(), {gamma1, beta1, weight_qkv, bias_attn_qkv}); @@ -454,6 +476,7 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP new_node_inputs.insert(new_node_inputs.end(), {weight_attn_o, bias_attn_o, gamma1, beta1, weight_m, bias_m, weight_p, bias_p, gamma2, beta2}); } + if(is_layernorm_) new_node_inputs.insert(new_node_inputs.end(), {gamma3, beta3}); } auto new_node = func_graph->NewCNode(new_node_inputs); MS_CHECK_TRUE_RET(new_node != nullptr, nullptr); diff --git a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.h b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.h index d3bc7b32f9c..a037d0d958c 100644 --- a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.h +++ b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.h @@ -51,8 +51,10 @@ class EncoderLayerFusion : public MultiplePatternProcessPass { const std::string kPatternEncoderLayerPreNorm = "PatternTEncoderLayerPreNorm"; const std::string kPatternEncoderLayerT5Post = "PatternEncoderLayerT5Post"; const std::string kPatternEncoderLayerT5Pre = "PatternEncoderLayerT5Pre"; + const std::string kPatternEncoderLayerNormT5Pre = "PatternEncoderLayerNormT5Pre"; + VectorRef DefinePatternEncoderLayer(bool post_layernorm, bool layernorm_fusion, bool is_position_bias_, - bool mask) const; + bool mask, bool is_layer_norm) const; VectorRef getTuple(bool post_layernorm, bool layernorm_fusion, bool is_position_bias) const; VectorRef DefineLayerNorm(bool is_position_bias, VectorRef input, VarPtr gamma, VarPtr beta, VarPtr eps) const; CNodePtr CreateMaskedEncoderLayerFusionNode(const FuncGraphPtr &func_graph, const EquivPtr &equiv, @@ -62,7 +64,7 @@ class EncoderLayerFusion : public MultiplePatternProcessPass { bool IsActGELU(const FuncGraphPtr &func_graph, const EquivPtr &equiv, const VarPtr &input_prim) const; lite::STATUS GetEps(const EquivPtr &equiv, VarPtr node_name, float *eps) const; lite::STATUS CheckPattern(const FuncGraphPtr &func_graph, const EquivPtr &equiv, int *head_num, int *head_size, - float *eps1, float *eps2, float *scale) const; + float *eps1, float *eps2, float *eps3, float *scale) const; std::shared_ptr CreatePrim(const FuncGraphPtr &func_graph, const EquivPtr &equiv, bool post_layernorm, int64_t ffn_hidden_size) const; @@ -73,6 +75,8 @@ class EncoderLayerFusion : public MultiplePatternProcessPass { mutable VarPtr gamma1_{nullptr}; mutable VarPtr beta2_{nullptr}; mutable VarPtr gamma2_{nullptr}; + mutable VarPtr beta3_{nullptr}; + mutable VarPtr gamma3_{nullptr}; mutable VarPtr weight_attn_qkv_{nullptr}; mutable VarPtr weight_attn_qkv_cross_{nullptr}; mutable VarPtr weight_attn_o_{nullptr}; @@ -92,6 +96,8 @@ class EncoderLayerFusion : public MultiplePatternProcessPass { mutable VarPtr is_act_{nullptr}; mutable VarPtr eps1_{nullptr}; mutable VarPtr eps2_{nullptr}; + mutable VarPtr eps3_{nullptr}; + mutable bool is_layernorm_{false}; }; } // namespace opt } // namespace mindspore diff --git a/trc/transformer/cfg_bert.config b/trc/transformer/cfg_bert.config index 2f318d6c2a4..5f053e985f4 100644 --- a/trc/transformer/cfg_bert.config +++ b/trc/transformer/cfg_bert.config @@ -1,3 +1,3 @@ [gpu_context] -input_shape=input_ids:[1,128];token_type_ids:[1,128];input_mask:[1,128] +input_shape=input_ids:[1,128];token_type_ids:[1,128];input_mask:[1,128,128] diff --git a/trc/transformer/deploy.sh b/trc/transformer/deploy.sh index 4dfe8ecd00d..836780dcdff 100755 --- a/trc/transformer/deploy.sh +++ b/trc/transformer/deploy.sh @@ -73,7 +73,7 @@ if [ -f cfg_${model}.config ]; then command+="--configFile=cfg_${model}.config " fi command+="--device=GPU " -#command+="--enableFp16=true" +# command+="--enableFp16=true" echo command=${command} echo ${command} > execute.sh rsync -v execute.sh ${server}:${PWD} diff --git a/trc/transformer/models.txt b/trc/transformer/models.txt index 5be278e62e3..370d483fec7 100755 --- a/trc/transformer/models.txt +++ b/trc/transformer/models.txt @@ -162,8 +162,8 @@ #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 1 -x 0 -m transformer_decoder_layer #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 0 -x 0 -m transformer_encoder_layer #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 1 -x 0 -m transformer_encoder_layer -#-b 1 -l 2 -H 2 -S 8 -s 20 -f 1024 -P 1 -c 75 -m bert --b 1 -l 12 -s 128 -t 128 -H 8 -S 512 -f 2048 -c 25 -m T5 +-b 6 -l 12 -H 12 -S 768 -s 128 -f 3072 -P 0 -c 75 -m bert +#-b 6 -l 12 -s 128 -t 256 -H 8 -S 512 -f 2048 -c 75 -m T5 #-b 1 -s 128 -t 256 -H 8 -S 512 -m mha_T5_cross @@ -193,4 +193,13 @@ #fp32 vsl no-layer-norm = 0.427564% #fp32 vsl layer-norm = 0.427583% -#full model + vsl = 0.151727% + no-tf = 0.205553% + tgt!=src = 0.204537% \ No newline at end of file +#full model + vsl = 0.151727% + no-tf = 0.205553% + tgt!=src = 0.204537% + +#T5 6 batch +vsl : 0.936759% +# no vsl : 0.324536% +#T5 1 batch vsl : 0.224442% +#no vsl : 0.199856% + +#without layernorm encoder and decoder +#T5 6 batch +vsl : 0.226612% without decoder_layernorm 0.226612% +# no vsl :0.333789% without decoder_layernorm : 0.333875% trt : 0.314171% \ No newline at end of file diff --git a/trc/transformer/train_transformer_export.py b/trc/transformer/train_transformer_export.py index 30754cc0d11..cc99185ad65 100755 --- a/trc/transformer/train_transformer_export.py +++ b/trc/transformer/train_transformer_export.py @@ -106,7 +106,7 @@ post_layernorm=True ffn_hidden_size=-1 app="ch" ffn_fp16 = False -compress = True +compress = False def read_args(): global batch global seq @@ -938,7 +938,7 @@ def bert_create(): base = repo.working_tree_dir name = "bert" str=" " - os.system(f"python {base}/../transformer_repo/pretrain_{name}.py {str.join(sys.argv[1:-4])} " ) + os.system(f"python {base}/../transformer_repo/pretrain_{name}.py {str.join(sys.argv[1:-4])} > bert.txt" ) def T5_create(): M.context.set_context(mode=M.context.PYNATIVE_MODE) -- Gitee From 1a996bf3ede103366fd66454bf77fb30785b6f20 Mon Sep 17 00:00:00 2001 From: batya kroizer Date: Sun, 12 Feb 2023 14:07:49 +0000 Subject: [PATCH 4/5] merge --- .../delegate/tensorrt/op/decoder_tensorrt.cc | 15 +-------------- .../delegate/tensorrt/op/decoder_tensorrt.h | 2 +- .../delegate/tensorrt/op/encoder_tensorrt.cc | 9 --------- .../delegate/tensorrt/op/encoder_tensorrt.h | 2 +- .../extendrt/delegate/tensorrt/op/mha_tensorrt.cc | 8 ++++++-- trc/transformer/build.sh | 1 + trc/transformer/ftBench.py | 4 ++-- trc/transformer/models.txt | 10 +++++++++- 8 files changed, 21 insertions(+), 30 deletions(-) diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc index 6bec884e017..16d7581a936 100755 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.cc @@ -194,7 +194,6 @@ int DecoderPlugin::RunCudaDecoder(const nvinfer1::PluginTensorDesc *inputDesc, for (int i = 0; i < num_of_inputs_; i++) { inputs_forward[i] = const_cast(inputs[i]); } - std::cout<<"num_of_inputs_ decoder: "<(inputs_forward, num_of_inputs_, outputs_forward, num_of_outputs_, ¶ms_, workspace); @@ -219,7 +218,7 @@ void DecoderPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, const nvinfer1::DynamicPluginTensorDesc *out, int nbOutputs) noexcept { const int request_batch_size = static_cast(in[0].desc.dims.d[0]); const int request_src_seq_len = static_cast(in[0].desc.dims.d[1]); - const int request_tgt_seq_len = static_cast(in[nbInputs-2].desc.dims.d[1]); + const int request_tgt_seq_len = params_.attn1.attn.position_bias ? static_cast(in[10].desc.dims.d[2]) : static_cast(in[14].desc.dims.d[2]); params_.common_param.batch_size = request_batch_size; params_.common_param.src_seq_len = request_src_seq_len; params_.common_param.tgt_seq_len = request_tgt_seq_len; @@ -228,18 +227,6 @@ void DecoderPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, } size_t DecoderPlugin::getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs, const nvinfer1::PluginTensorDesc *outputs, int nbOutputs) const noexcept { - // params_.decoder.d_sequence_length = nullptr; - // params_.decoder.padding_offset = nullptr; - // params_.decoder.d_sequence_length2 = nullptr; - // params_.decoder.padding_offset2 = nullptr; - // params_.attn1.attn.d_sequence_length=nullptr; - // params_.attn1.attn.padding_offset=nullptr; - // params_.attn1.attn.d_sequence_length2=nullptr; - // params_.attn1.attn.padding_offset2=nullptr; - // params_.attn2.attn.d_sequence_length=nullptr; - // params_.attn2.attn.padding_offset=nullptr; - // params_.attn2.attn.d_sequence_length2=nullptr; - // params_.attn2.attn.padding_offset2=nullptr; if (compute_type_ == RuntimePrecisionMode_FP16) { return fastertransformer::GetDecoderLayerWorkspaceSize(¶ms_); } else { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.h b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.h index d9d5f458383..cb9150ac3fa 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/decoder_tensorrt.h @@ -40,7 +40,7 @@ class DecoderTensorRT : public TensorRTOp { private: nvinfer1::ITensor *castTensor(TensorRTContext *ctx, const TensorInfo &ms_tensor, const std::string &op_name); - bool is_ffn_fp16_ = false; + bool is_ffn_fp16_ = true; }; constexpr auto DECODER_PLUGIN_NAME{"DecoderPlugin"}; diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc index 6b3620908aa..5577b71996e 100755 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.cc @@ -199,8 +199,6 @@ int EncoderPlugin::RunCudaEncoder(const nvinfer1::PluginTensorDesc *inputDesc, inputs_forward[i] = const_cast(inputs[i]); } void *outputs_forward[] = {outputs[0]}; - std::cout<<"num_of_inputs_ "<(inputs_forward, num_of_inputs_, outputs_forward, num_of_outputs_, ¶ms_, workspace); return RET_OK; @@ -230,16 +228,9 @@ void EncoderPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, params_.common_param.tgt_seq_len = request_tgt_seq_len; num_of_inputs_ = nbInputs; num_of_outputs_ = nbOutputs; - std::cout<<"num_of_inputs_"<(¶ms_); } else { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.h b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.h index 555c97acd5b..41118ef2020 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/encoder_tensorrt.h @@ -41,7 +41,7 @@ class EncoderTensorRT : public TensorRTOp { private: nvinfer1::ITensor *castTensor(TensorRTContext *ctx, const TensorInfo &ms_tensor, const std::string &op_name); - bool is_ffn_fp16_ = false; + bool is_ffn_fp16_ = true; }; constexpr auto ENCODER_PLUGIN_NAME{"EncoderPlugin"}; diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/mha_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/mha_tensorrt.cc index 75cb121d986..03abb00bca3 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/mha_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/mha_tensorrt.cc @@ -131,7 +131,7 @@ int MhaPlugin::RunCudaMha(const nvinfer1::PluginTensorDesc *inputDesc, const nvi const int attn_mask_tensor_idx = 7 + cross_tensor_offset; const int bias_qkv_tensor_idx = 5 + cross_tensor_offset; const int weight_qkv_tensor_idx = 3; - const int position_bias_tensor_idx = 6 + cross_tensor_offset; + const int position_bias_tensor_idx = 5 + cross_tensor_offset; common_param_.algo = algoId; common_param_.stream = stream; void *inputs_attn[num_of_inputs_]; @@ -148,8 +148,8 @@ int MhaPlugin::RunCudaMha(const nvinfer1::PluginTensorDesc *inputDesc, const nvi inputs_attn[index++] = const_cast(inputs[bias_qkv_tensor_idx]); } if (params_.attn.position_bias) { + inputs_attn[index++] = const_cast(inputs[attn_mask_tensor_idx - C1NUM]); inputs_attn[index++] = const_cast(inputs[position_bias_tensor_idx]); - inputs_attn[index++] = const_cast(inputs[attn_mask_tensor_idx - C2NUM]); } else { inputs_attn[index++] = const_cast(inputs[attn_mask_tensor_idx]); } @@ -186,6 +186,10 @@ void MhaPlugin::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int common_param_.batch_size = request_batch_size; common_param_.src_seq_len = request_src_seq_len; common_param_.tgt_seq_len = request_tgt_seq_len; + common_param_.h_token_num = common_param_.batch_size * common_param_.src_seq_len; + common_param_.h_token_num2 = common_param_.batch_size * common_param_.tgt_seq_len; + params_.attn.padding_offset = nullptr; + params_.attn.padding_offset2 = nullptr; num_of_inputs_ = nbInputs; num_of_outputs_ = nbOutputs; } diff --git a/trc/transformer/build.sh b/trc/transformer/build.sh index 72ddec2dc44..179d9aade90 100755 --- a/trc/transformer/build.sh +++ b/trc/transformer/build.sh @@ -16,4 +16,5 @@ MSLITE_GPU_BACKEND=tensorrt \ MSLITE_GPU_ARCH=80 \ TENSORRT_PATH=/usr/lib/x86_64-linux-gnu \ MSLIBS_SERVER=localHost \ +CPATH=/usr/include/python3.8 \ ${base}/build.sh -I x86_64 $@ diff --git a/trc/transformer/ftBench.py b/trc/transformer/ftBench.py index e742914dd33..7793355f8ce 100755 --- a/trc/transformer/ftBench.py +++ b/trc/transformer/ftBench.py @@ -12,14 +12,14 @@ f.close() system = f'{base}/trc/system_test/release/ubuntu_x86/mindspore-lite-{version}-linux-x64' benchmark = f'{system}/tools/benchmark' work_dir=f'{base}/trc/transformer' -image = "private_transformer:0.1" +image = "private_transformer:0.2" server = "caspi" enable_fp16 = "false" suffix="fp32" usage='enter the correct parameters: app=ch\\trc, act=runtime\\be, loop count=int>=0, server=local\\num of server\nif app=trc and act=be loop count must be 1' app='ch' act='be' -cuda_visible_dev=6 +cuda_visible_dev=3 loop_count=1 if len(sys.argv)>2 or len(sys.argv)==1: parameters=sys.argv[1:] diff --git a/trc/transformer/models.txt b/trc/transformer/models.txt index f8933f21ffa..fd36692f851 100755 --- a/trc/transformer/models.txt +++ b/trc/transformer/models.txt @@ -163,6 +163,14 @@ #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 0 -x 0 -m transformer_encoder_layer #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 1 -x 0 -m transformer_encoder_layer #-b 1 -l 2 -H 2 -S 8 -s 20 -f 1024 -P 1 -c 75 -m bert --b 1 -l 2 -s 128 -t 256 -H 8 -S 512 -f 2048 -c 75 -m T5 +-b 1 -l 1 -s 128 -t 256 -H 8 -S 512 -f 2048 -c 75 -m T5 +#-b 1 -s 128 -t 128 -H 8 -S 512 -m mha_T5_cross +#fusion encoder + decoder: +#0.854797% LAYERNORM+128+256 +#0.861399% LAYERNORM+128+128 +#0.21935% NO LAYERNORM+128+256 :(trt= 0.256139%) +#0.399801% NO LAYERNORM+128+128 + +#0.223942% NO LAYERNORM + MHA_FUSION + 128+256 \ No newline at end of file -- Gitee From d76160969581246f553901b68a5bfa11df3a1aff Mon Sep 17 00:00:00 2001 From: batya kroizer Date: Sun, 12 Feb 2023 16:35:30 +0200 Subject: [PATCH 5/5] fix --- mindspore/core/ops/decoder_layer.cc | 4 ++-- mindspore/core/ops/decoder_layer.h | 2 +- mindspore/core/ops/encoder_layer.cc | 2 +- .../lite/tools/optimizer/fusion/decoder_layer_fusion.cc | 9 +++++---- .../lite/tools/optimizer/fusion/encoder_layer_fusion.cc | 5 +++-- trc/transformer/models.txt | 9 ++++++--- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/mindspore/core/ops/decoder_layer.cc b/mindspore/core/ops/decoder_layer.cc index b1e0f6f35af..47a9e3e8004 100644 --- a/mindspore/core/ops/decoder_layer.cc +++ b/mindspore/core/ops/decoder_layer.cc @@ -118,7 +118,7 @@ ActType DecoderLayer::get_act_type() const { return ActType(GetValue(value_ptr)); } bool DecoderLayer::get_layer_norm() const { - auto value_ptr = this->GetAttr(kDecoderLayeNorm); + auto value_ptr = this->GetAttr(kLayerNorm); return GetValue(value_ptr); } void DecoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, @@ -137,7 +137,7 @@ void DecoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm this->set_act_type(act_type); this->set_scale1(scale1); this->set_scale2(scale2); - this->set_scale2(layer_norm); + this->set_layer_norm(layer_norm); } REGISTER_PRIMITIVE_C(kNameDecoderLayer, DecoderLayer); } // namespace mindspore::ops diff --git a/mindspore/core/ops/decoder_layer.h b/mindspore/core/ops/decoder_layer.h index 88a8f2ad741..b0e8827d18b 100644 --- a/mindspore/core/ops/decoder_layer.h +++ b/mindspore/core/ops/decoder_layer.h @@ -64,7 +64,7 @@ class MIND_API DecoderLayer : public BaseOperator { /// \param[in] eps_layernorm1 Define eps layernorm1. /// \param[in] eps_layernorm2 Define eps layernorm2. /// \param[in] eps_layernorm3 Define eps layernorm3. - /// \param[in] eps_layernorm3 Define eps layernorm3. + /// \param[in] eps_layernorm4 Define eps layernorm4. /// \param[in] ffn_hidden_size Define ffn hidden size. /// \param[in] position_bias1 Define position_bias1. /// \param[in] position_bias2 Define position_bias2. diff --git a/mindspore/core/ops/encoder_layer.cc b/mindspore/core/ops/encoder_layer.cc index fccb4de82d0..06dd3ae6630 100644 --- a/mindspore/core/ops/encoder_layer.cc +++ b/mindspore/core/ops/encoder_layer.cc @@ -98,7 +98,7 @@ ActType EncoderLayer::get_act_type() const { return ActType(GetValue(value_ptr)); } bool EncoderLayer::get_layer_norm() const { - auto value_ptr = this->GetAttr(kDecoderLayeNorm); + auto value_ptr = this->GetAttr(kLayerNorm); return GetValue(value_ptr); } void EncoderLayer::Init(int64_t head_num, int64_t head_size, float eps_layernorm1, float eps_layernorm2, diff --git a/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.cc b/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.cc index 7c6b686b820..5c02df1f8f7 100644 --- a/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/decoder_layer_fusion.cc @@ -276,9 +276,10 @@ AnfNodePtr DecoderLayerFusion::Process(const std::string &pattern_name, const mi if (pattern_name == kPatternDecoderT5Pre || pattern_name == kPatternDecoderT5Post || pattern_name == kPatternDecoderLayerNormT5Pre) { is_position_bias_ = true; } - std::cout<<"pattern_name= "<((*equiv)[beta2_]); beta3 = utils::cast((*equiv)[beta3_]); if(is_layernorm_) - beta4 = utils::cast((*equiv)[beta3_]); + beta4 = utils::cast((*equiv)[beta4_]); } auto gamma1 = utils::cast((*equiv)[gamma1_]); auto gamma2 = utils::cast((*equiv)[gamma2_]); auto gamma3 = utils::cast((*equiv)[gamma3_]); if(is_layernorm_) - gamma4 = utils::cast((*equiv)[gamma3_]); + gamma4 = utils::cast((*equiv)[gamma4_]); input_mask = mask ? utils::cast((*equiv)[mask_]) : nullptr; auto cross_mask = utils::cast((*equiv)[cross_mask_]); diff --git a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc index 0c0c78f4146..c7bf1e506ac 100644 --- a/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/encoder_layer_fusion.cc @@ -264,6 +264,7 @@ AnfNodePtr EncoderLayerFusion::Process(const std::string &pattern_name, const mi if (func_graph == nullptr || node == nullptr || equiv == nullptr) { return nullptr; } + is_layernorm_ = false; if(pattern_name == kPatternEncoderLayerNormT5Pre) is_layernorm_ = true; if (pattern_name == kPatternEncoderLayerPostNorm || pattern_name == kPatternEncoderLayerPreNorm) is_layernorm_fusion_ = true; @@ -423,7 +424,7 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP MS_ASSERT(equiv != nullptr); MS_ASSERT(node != nullptr); auto input = utils::cast((*equiv)[input_]); - AnfNodePtr position_bias, input_mask, bias_attn_o, bias_attn_qkv, beta1, beta2, bias_m, bias_p, gamma3, + AnfNodePtr position_bias, input_mask, bias_attn_o, bias_attn_qkv, beta1, beta2, bias_m, bias_p, beta3, gamma3; auto weight_qkv = utils::cast((*equiv)[weight_attn_qkv_]); auto weight_attn_o = utils::cast((*equiv)[weight_attn_o_]); @@ -442,7 +443,7 @@ CNodePtr EncoderLayerFusion::CreateMaskedEncoderLayerFusionNode(const FuncGraphP auto gamma1 = utils::cast((*equiv)[gamma1_]); auto gamma2 = utils::cast((*equiv)[gamma2_]); if(is_layernorm_) - auto gamma3 = utils::cast((*equiv)[gamma3_]); + gamma3 = utils::cast((*equiv)[gamma3_]); input_mask = mask ? utils::cast((*equiv)[mask_]) : nullptr; auto base_shape_ptr = weight_m->Shape(); MS_EXCEPTION_IF_NULL(base_shape_ptr); diff --git a/trc/transformer/models.txt b/trc/transformer/models.txt index 370d483fec7..db764baa6cb 100755 --- a/trc/transformer/models.txt +++ b/trc/transformer/models.txt @@ -162,8 +162,8 @@ #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 1 -x 0 -m transformer_decoder_layer #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 0 -x 0 -m transformer_encoder_layer #-b 1 -l 12 -H 2 -S 8 -s 20 -f 32 -P 1 -x 0 -m transformer_encoder_layer --b 6 -l 12 -H 12 -S 768 -s 128 -f 3072 -P 0 -c 75 -m bert -#-b 6 -l 12 -s 128 -t 256 -H 8 -S 512 -f 2048 -c 75 -m T5 +#-b 6 -l 12 -H 12 -S 768 -s 128 -f 3072 -P 0 -c 75 -m bert +-b 6 -l 12 -s 128 -t 256 -H 8 -S 512 -f 2048 -c 75 -m T5 #-b 1 -s 128 -t 256 -H 8 -S 512 -m mha_T5_cross @@ -202,4 +202,7 @@ #without layernorm encoder and decoder #T5 6 batch +vsl : 0.226612% without decoder_layernorm 0.226612% -# no vsl :0.333789% without decoder_layernorm : 0.333875% trt : 0.314171% \ No newline at end of file +# no vsl :0.333789% without decoder_layernorm : 0.333875% trt : 0.314171% + +#with encoder + decoder convert layer-norm 0.666858% +#without 0.326567% \ No newline at end of file -- Gitee