代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/openssl 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From 3f0898b2aea424f18f58a182803478f25548674e Mon Sep 17 00:00:00 2001
From: Xu Yizhou <xuyizhou1@huawei.com>
Date: Wed, 2 Nov 2022 11:13:07 +0800
Subject: [PATCH 3/3] SM4 XTS optimization for ARM by HW instruction
This patch implements the SM4 XTS optimization for ARM processor,
using SM4 HW instruction, which is an optional feature of
crypto extension for aarch64 V8.
Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
---
crypto/evp/e_sm4.c | 28 ++
crypto/sm4/asm/sm4-armv8.pl | 498 +++++++++++++++++++++++++++++++++-
include/crypto/sm4_platform.h | 14 +
3 files changed, 537 insertions(+), 3 deletions(-)
diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c
index eaa5ba0..da4dbd3 100644
--- a/crypto/evp/e_sm4.c
+++ b/crypto/evp/e_sm4.c
@@ -281,6 +281,34 @@ static int sm4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
const int bytes = EVP_CIPHER_CTX_key_length(ctx) / 2;
xctx->stream_gb = NULL;
xctx->stream = NULL;
+#ifdef HWSM4_CAPABLE
+ if (HWSM4_CAPABLE) {
+ if (enc) {
+ HWSM4_set_encrypt_key(key, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f) HWSM4_encrypt;
+# ifdef HWSM4_xts_encrypt_gb
+ xctx->stream_gb = HWSM4_xts_encrypt_gb;
+# endif
+# ifdef HWSM4_xts_encrypt
+ xctx->stream = HWSM4_xts_encrypt;
+# endif
+ } else {
+ HWSM4_set_decrypt_key(key, &xctx->ks1.ks);
+ xctx->xts.block1 = (block128_f) HWSM4_decrypt;
+# ifdef HWSM4_xts_decrypt_gb
+ xctx->stream_gb = HWSM4_xts_decrypt_gb;
+# endif
+# ifdef HWSM4_xts_decrypt
+ xctx->stream = HWSM4_xts_decrypt;
+# endif
+ }
+ HWSM4_set_encrypt_key(key + bytes, &xctx->ks2.ks);
+ xctx->xts.block2 = (block128_f) HWSM4_encrypt;
+
+ xctx->xts.key1 = &xctx->ks1;
+ break;
+ } else
+#endif
#ifdef VPSM4_EX_CAPABLE
if (VPSM4_EX_CAPABLE) {
if (enc) {
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl
index dbacad2..923c1c0 100644
--- a/crypto/sm4/asm/sm4-armv8.pl
+++ b/crypto/sm4/asm/sm4-armv8.pl
@@ -11,9 +11,9 @@
# Oct 2021
#
-# $output is the last argument if it looks like a file (it has an extension)
+# $outut is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
-$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$outut = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
@@ -21,7 +21,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour \"$output\""
+open OUT,"| \"$^X\" $xlate $flavour \"$outut\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
@@ -110,6 +110,120 @@ $code.=<<___;
___
}
+sub mov_reg_to_vec() {
+ my $src0 = shift;
+ my $src1 = shift;
+ my $desv = shift;
+$code.=<<___;
+ mov $desv.d[0],$src0
+ mov $desv.d[1],$src1
+#ifdef __ARMEB__
+ rev32 $desv.16b,$desv.16b
+#endif
+___
+}
+
+sub mov_vec_to_reg() {
+ my $srcv = shift;
+ my $des0 = shift;
+ my $des1 = shift;
+$code.=<<___;
+ mov $des0,$srcv.d[0]
+ mov $des1,$srcv.d[1]
+___
+}
+
+sub compute_tweak() {
+ my $src0 = shift;
+ my $src1 = shift;
+ my $des0 = shift;
+ my $des1 = shift;
+ my $tmp0 = shift;
+ my $tmp1 = shift;
+ my $magic = shift;
+$code.=<<___;
+ extr x$tmp1,$src1,$src1,#32
+ extr $des1,$src1,$src0,#63
+ and w$tmp0,w$magic,w$tmp1,asr#31
+ eor $des0,x$tmp0,$src0,lsl#1
+___
+}
+
+sub compute_tweak_vec() {
+ my $src = shift;
+ my $des = shift;
+ my $tmp0 = shift;
+ my $tmp1 = shift;
+ my $magic = shift;
+ &rbit($tmp1,$src);
+$code.=<<___;
+ shl $des.16b, $tmp1.16b, #1
+ ext $tmp0.16b, $tmp1.16b, $tmp1.16b,#15
+ ushr $tmp0.16b, $tmp0.16b, #7
+ mul $tmp0.16b, $tmp0.16b, $magic.16b
+ eor $des.16b, $des.16b, $tmp0.16b
+___
+ &rbit($des,$des);
+}
+
+sub mov_en_to_enc(){
+ my $en = shift;
+ my $enc = shift;
+ if ($en eq "en") {
+$code.=<<___;
+ mov $enc,1
+___
+ } else {
+$code.=<<___;
+ mov $enc,0
+___
+ }
+}
+
+sub rbit() {
+ my $dst = shift;
+ my $src = shift;
+
+ if ($src and ("$src" ne "$dst")) {
+ if ($standard eq "_gb") {
+$code.=<<___;
+ rbit $dst.16b,$src.16b
+___
+ } else {
+$code.=<<___;
+ mov $dst.16b,$src.16b
+___
+ }
+ } else {
+ if ($standard eq "_gb") {
+$code.=<<___;
+ rbit $dst.16b,$src.16b
+___
+ }
+ }
+}
+
+sub rev32_armeb() {
+ my $dst = shift;
+ my $src = shift;
+
+ if ($src and ("$src" ne "$dst")) {
+$code.=<<___;
+#ifdef __ARMEB__
+ rev32 $dst.16b,$src.16b
+#else
+ mov $dst.16b,$src.16b
+#endif
+___
+ } else {
+$code.=<<___;
+#ifdef __ARMEB__
+ rev32 $dst.16b,$dst.16b
+#endif
+___
+ }
+}
+
$code=<<___;
#include "arm_arch.h"
.arch armv8-a+crypto
@@ -595,6 +709,384 @@ $code.=<<___;
.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
___
}}}
+
+
+{{{
+my ($inp,$out,$len,$rk1,$rk2,$ivp)=map("x$_",(0..5));
+my ($blocks)=("x2");
+my ($enc)=("x6");
+my ($remain)=("x7");
+my @twx=map("x$_",(9..24));
+my $lastBlk=("x25");
+
+my @tweak=map("v$_",(8..15));
+my @dat=map("v$_",(16..23));
+my $lastTweak=("v24");
+
+# x/w/v/q registers for compute tweak
+my ($magic)=("8");
+my ($tmp0,$tmp1)=("26","27");
+my ($qMagic,$vMagic)=("q25","v25");
+my ($vTmp0,$vTmp1)=("v26","v27");
+
+sub gen_xts_do_cipher() {
+$code.=<<___;
+.globl ${prefix}_xts_do_cipher${standard}
+.type ${prefix}_xts_do_cipher${standard},%function
+.align 5
+${prefix}_xts_do_cipher${standard}:
+ mov w$magic,0x87
+ ldr $qMagic, =0x01010101010101010101010101010187
+ // used to encrypt the XORed plaintext blocks
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk2],#64
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk2]
+ ld1 {@tweak[0].4s}, [$ivp]
+___
+ &rev32(@tweak[0],@tweak[0]);
+ &enc_blk(@tweak[0]);
+ &rev32(@tweak[0],@tweak[0]);
+$code.=<<___;
+ // used to encrypt the initial vector to yield the initial tweak
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk1],#64
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk1]
+
+ and $remain,$len,#0x0F
+ // convert length into blocks
+ lsr $blocks,$len,4
+ cmp $blocks,#1 // $len must be at least 16
+ b.lt 99f
+
+ cmp $remain,0 // if $len is a multiple of 16
+ b.eq .xts_encrypt_blocks${standard}
+ // if $len is not a multiple of 16
+ subs $blocks,$blocks,#1
+ b.eq .only_2blks_tweak${standard} // if $len is less than 32
+
+.xts_encrypt_blocks${standard}:
+___
+ &rbit(@tweak[0],@tweak[0]);
+ &rev32_armeb(@tweak[0],@tweak[0]);
+ &mov_vec_to_reg(@tweak[0],@twx[0],@twx[1]);
+ &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13],$tmp0,$tmp1,$magic);
+ &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15],$tmp0,$tmp1,$magic);
+$code.=<<___;
+1:
+ cmp $blocks,#8
+___
+ &mov_reg_to_vec(@twx[0],@twx[1],@tweak[0]);
+ &compute_tweak(@twx[14],@twx[15],@twx[0],@twx[1],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[2],@twx[3],@tweak[1]);
+ &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[4],@twx[5],@tweak[2]);
+ &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[6],@twx[7],@tweak[3]);
+ &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[8],@twx[9],@tweak[4]);
+ &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[10],@twx[11],@tweak[5]);
+ &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[12],@twx[13],@tweak[6]);
+ &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13],$tmp0,$tmp1,$magic);
+ &mov_reg_to_vec(@twx[14],@twx[15],@tweak[7]);
+ &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15],$tmp0,$tmp1,$magic);
+$code.=<<___;
+ b.lt 2f
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
+___
+ &rbit(@tweak[0],@tweak[0]);
+ &rbit(@tweak[1],@tweak[1]);
+ &rbit(@tweak[2],@tweak[2]);
+ &rbit(@tweak[3],@tweak[3]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+ eor @dat[3].16b, @dat[3].16b, @tweak[3].16b
+ ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp],#64
+___
+ &rbit(@tweak[4],@tweak[4]);
+ &rbit(@tweak[5],@tweak[5]);
+ &rbit(@tweak[6],@tweak[6]);
+ &rbit(@tweak[7],@tweak[7]);
+$code.=<<___;
+ eor @dat[4].16b, @dat[4].16b, @tweak[4].16b
+ eor @dat[5].16b, @dat[5].16b, @tweak[5].16b
+ eor @dat[6].16b, @dat[6].16b, @tweak[6].16b
+ eor @dat[7].16b, @dat[7].16b, @tweak[7].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+ &rev32(@dat[3],@dat[3]);
+ &rev32(@dat[4],@dat[4]);
+ &rev32(@dat[5],@dat[5]);
+ &rev32(@dat[6],@dat[6]);
+ &rev32(@dat[7],@dat[7]);
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
+ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+ &rev32(@dat[3],@dat[3]);
+ &rev32(@dat[4],@dat[4]);
+ &rev32(@dat[5],@dat[5]);
+ &rev32(@dat[6],@dat[6]);
+ &rev32(@dat[7],@dat[7]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+ eor @dat[3].16b, @dat[3].16b, @tweak[3].16b
+ eor @dat[4].16b, @dat[4].16b, @tweak[4].16b
+ eor @dat[5].16b, @dat[5].16b, @tweak[5].16b
+ eor @dat[6].16b, @dat[6].16b, @tweak[6].16b
+ eor @dat[7].16b, @dat[7].16b, @tweak[7].16b
+
+ // save the last tweak
+ mov $lastTweak.16b,@tweak[7].16b
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
+ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
+ subs $blocks,$blocks,#8
+ b.eq 100f
+ b 1b
+2:
+ // process 4 blocks
+ cmp $blocks,#4
+ b.lt 1f
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
+___
+ &rbit(@tweak[0],@tweak[0]);
+ &rbit(@tweak[1],@tweak[1]);
+ &rbit(@tweak[2],@tweak[2]);
+ &rbit(@tweak[3],@tweak[3]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+ eor @dat[3].16b, @dat[3].16b, @tweak[3].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+ &rev32(@dat[3],@dat[3]);
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+ &rev32(@dat[3],@dat[3]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+ eor @dat[3].16b, @dat[3].16b, @tweak[3].16b
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
+ sub $blocks,$blocks,#4
+ mov @tweak[0].16b,@tweak[4].16b
+ mov @tweak[1].16b,@tweak[5].16b
+ mov @tweak[2].16b,@tweak[6].16b
+ // save the last tweak
+ mov $lastTweak.16b,@tweak[3].16b
+1:
+ // process last block
+ cmp $blocks,#1
+ b.lt 100f
+ b.gt 1f
+ ld1 {@dat[0].4s},[$inp],#16
+___
+ &rbit(@tweak[0],@tweak[0]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &enc_blk(@dat[0]);
+ &rev32(@dat[0],@dat[0]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ st1 {@dat[0].4s},[$out],#16
+ // save the last tweak
+ mov $lastTweak.16b,@tweak[0].16b
+ b 100f
+1: // process last 2 blocks
+ cmp $blocks,#2
+ b.gt 1f
+ ld1 {@dat[0].4s,@dat[1].4s},[$inp],#32
+___
+ &rbit(@tweak[0],@tweak[0]);
+ &rbit(@tweak[1],@tweak[1]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ st1 {@dat[0].4s,@dat[1].4s},[$out],#32
+ // save the last tweak
+ mov $lastTweak.16b,@tweak[1].16b
+ b 100f
+1: // process last 3 blocks
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s},[$inp],#48
+___
+ &rbit(@tweak[0],@tweak[0]);
+ &rbit(@tweak[1],@tweak[1]);
+ &rbit(@tweak[2],@tweak[2]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
+ &rev32(@dat[0],@dat[0]);
+ &rev32(@dat[1],@dat[1]);
+ &rev32(@dat[2],@dat[2]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[0].16b
+ eor @dat[1].16b, @dat[1].16b, @tweak[1].16b
+ eor @dat[2].16b, @dat[2].16b, @tweak[2].16b
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s},[$out],#48
+ // save the last tweak
+ mov $lastTweak.16b,@tweak[2].16b
+100:
+ cmp $remain,0
+ b.eq 99f
+
+// This brance calculates the last two tweaks,
+// while the encryption/decryption length is larger than 32
+.last_2blks_tweak${standard}:
+___
+ &rev32_armeb($lastTweak,$lastTweak);
+ &compute_tweak_vec($lastTweak,@tweak[1],$vTmp0,$vTmp1,$vMagic);
+ &compute_tweak_vec(@tweak[1],@tweak[2],$vTmp0,$vTmp1,$vMagic);
+$code.=<<___;
+ b .check_dec${standard}
+
+
+// This brance calculates the last two tweaks,
+// while the encryption/decryption length is less than 32, who only need two tweaks
+.only_2blks_tweak${standard}:
+ mov @tweak[1].16b,@tweak[0].16b
+___
+ &rev32_armeb(@tweak[1],@tweak[1]);
+ &compute_tweak_vec(@tweak[1],@tweak[2],$vTmp0,$vTmp1,$vMagic);
+$code.=<<___;
+ b .check_dec${standard}
+
+
+// Determine whether encryption or decryption is required.
+// The last two tweaks need to be swapped for decryption.
+.check_dec${standard}:
+ // encryption:1 decryption:0
+ cmp $enc,1
+ b.eq .prcess_last_2blks${standard}
+ mov $vTmp0.16B,@tweak[1].16b
+ mov @tweak[1].16B,@tweak[2].16b
+ mov @tweak[2].16B,$vTmp0.16b
+
+.prcess_last_2blks${standard}:
+___
+ &rev32_armeb(@tweak[1],@tweak[1]);
+ &rev32_armeb(@tweak[2],@tweak[2]);
+$code.=<<___;
+ ld1 {@dat[0].4s},[$inp],#16
+ eor @dat[0].16b, @dat[0].16b, @tweak[1].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &enc_blk(@dat[0]);
+ &rev32(@dat[0],@dat[0]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[1].16b
+ st1 {@dat[0].4s},[$out],#16
+
+ sub $lastBlk,$out,16
+ .loop${standard}:
+ subs $remain,$remain,1
+ ldrb w$tmp0,[$lastBlk,$remain]
+ ldrb w$tmp1,[$inp,$remain]
+ strb w$tmp1,[$lastBlk,$remain]
+ strb w$tmp0,[$out,$remain]
+ b.gt .loop${standard}
+ ld1 {@dat[0].4s}, [$lastBlk]
+ eor @dat[0].16b, @dat[0].16b, @tweak[2].16b
+___
+ &rev32(@dat[0],@dat[0]);
+ &enc_blk(@dat[0]);
+ &rev32(@dat[0],@dat[0]);
+$code.=<<___;
+ eor @dat[0].16b, @dat[0].16b, @tweak[2].16b
+ st1 {@dat[0].4s}, [$lastBlk]
+99:
+ ret
+.size ${prefix}_xts_do_cipher${standard},.-${prefix}_xts_do_cipher${standard}
+___
+} #end of gen_xts_do_cipher
+
+}}}
+
+{{{
+my ($enc)=("w6");
+
+sub gen_xts_cipher() {
+ my $en = shift;
+$code.=<<___;
+.globl ${prefix}_xts_${en}crypt${standard}
+.type ${prefix}_xts_${en}crypt${standard},%function
+.align 5
+${prefix}_xts_${en}crypt${standard}:
+ stp x15, x16, [sp, #-0x10]!
+ stp x17, x18, [sp, #-0x10]!
+ stp x19, x20, [sp, #-0x10]!
+ stp x21, x22, [sp, #-0x10]!
+ stp x23, x24, [sp, #-0x10]!
+ stp x25, x26, [sp, #-0x10]!
+ stp x27, x28, [sp, #-0x10]!
+ stp x29, x30, [sp, #-0x10]!
+ stp d8, d9, [sp, #-0x10]!
+ stp d10, d11, [sp, #-0x10]!
+ stp d12, d13, [sp, #-0x10]!
+ stp d14, d15, [sp, #-0x10]!
+___
+ &mov_en_to_enc($en,$enc);
+$code.=<<___;
+ bl ${prefix}_xts_do_cipher${standard}
+ ldp d14, d15, [sp], #0x10
+ ldp d12, d13, [sp], #0x10
+ ldp d10, d11, [sp], #0x10
+ ldp d8, d9, [sp], #0x10
+ ldp x29, x30, [sp], #0x10
+ ldp x27, x28, [sp], #0x10
+ ldp x25, x26, [sp], #0x10
+ ldp x23, x24, [sp], #0x10
+ ldp x21, x22, [sp], #0x10
+ ldp x19, x20, [sp], #0x10
+ ldp x17, x18, [sp], #0x10
+ ldp x15, x16, [sp], #0x10
+ ret
+.size ${prefix}_xts_${en}crypt${standard},.-${prefix}_xts_${en}crypt${standard}
+___
+
+} # end of gen_xts_cipher
+$standard="_gb";
+&gen_xts_do_cipher();
+&gen_xts_cipher("en");
+&gen_xts_cipher("de");
+$standard="";
+&gen_xts_do_cipher();
+&gen_xts_cipher("en");
+&gen_xts_cipher("de");
+}}}
########################################
{ my %opcode = (
"sm4e" => 0xcec08400,
diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h
index 2f5a6cf..0bde96f 100644
--- a/include/crypto/sm4_platform.h
+++ b/include/crypto/sm4_platform.h
@@ -26,6 +26,10 @@
# define HWSM4_cbc_encrypt sm4_v8_cbc_encrypt
# define HWSM4_ecb_encrypt sm4_v8_ecb_encrypt
# define HWSM4_ctr32_encrypt_blocks sm4_v8_ctr32_encrypt_blocks
+# define HWSM4_xts_encrypt_gb sm4_v8_xts_encrypt_gb
+# define HWSM4_xts_decrypt_gb sm4_v8_xts_decrypt_gb
+# define HWSM4_xts_encrypt sm4_v8_xts_encrypt
+# define HWSM4_xts_decrypt sm4_v8_xts_decrypt
# endif
# endif
# endif /* OPENSSL_CPUID_OBJ */
@@ -46,6 +50,16 @@ void HWSM4_ecb_encrypt(const unsigned char *in, unsigned char *out,
void HWSM4_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
size_t len, const void *key,
const unsigned char ivec[16]);
+/* xts mode in GB/T 17964-2021 */
+void HWSM4_xts_encrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
+ const SM4_KEY *key2, const uint8_t iv[16]);
+void HWSM4_xts_decrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
+ const SM4_KEY *key2, const uint8_t iv[16]);
+/* xts mode in IEEE Std 1619-2007 */
+void HWSM4_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
+ const SM4_KEY *key2, const uint8_t iv[16]);
+void HWSM4_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
+ const SM4_KEY *key2, const uint8_t iv[16]);
# endif /* HWSM4_CAPABLE */
#ifdef VPSM4_EX_CAPABLE
--
2.36.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。