代码拉取完成,页面将自动刷新
From eec2accd50fffe1399151112f53f4061b0eef2f0 Mon Sep 17 00:00:00 2001
From: Wenkai Lin <linwenkai6@hisilicon.com>
Date: Wed, 20 Mar 2024 16:11:22 +0800
Subject: [PATCH 25/44] cipher: add support for SM4 CBC and CTR modes in CE
instruction
This patch implements the CE instruction using SM4 CBC and CTR modes,
and includes the necessary logic for mode-specific operations,
such as generating initialization vectors (IV) and handling chaining
and counter values.
Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com>
Signed-off-by: Qi Tao <taoqi10@huawei.com>
---
Makefile.am | 5 +-
drv/isa_ce_sm4.c | 235 +++++++++++++
drv/isa_ce_sm4.h | 38 ++
drv/isa_ce_sm4_armv8.S | 774 +++++++++++++++++++++++++++++++++++++++++
v1/wd.c | 3 +-
v1/wd_rng.c | 4 +-
wd_cipher.c | 4 +-
7 files changed, 1056 insertions(+), 7 deletions(-)
create mode 100644 drv/isa_ce_sm4.c
create mode 100644 drv/isa_ce_sm4.h
create mode 100644 drv/isa_ce_sm4_armv8.S
diff --git a/Makefile.am b/Makefile.am
index cd3d7e5..f78ad14 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -78,6 +78,7 @@ libwd_crypto_la_SOURCES=wd_cipher.c wd_cipher.h wd_cipher_drv.h \
wd_rsa.c wd_rsa.h wd_rsa_drv.h \
wd_dh.c wd_dh.h wd_dh_drv.h \
wd_ecc.c wd_ecc.h wd_ecc_drv.h \
+ arm_arch_ce.h isa_ce_sm3.h isa_ce_sm4.h \
wd_digest.c wd_digest.h wd_digest_drv.h \
wd_util.c wd_util.h \
wd_sched.c wd_sched.h \
@@ -90,8 +91,8 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \
libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \
hisi_qm_udrv.h
-libisa_ce_la_SOURCES=drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S arm_arch_ce.h \
- drv/isa_ce_sm3.h
+libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \
+ drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h
if WD_STATIC_DRV
AM_CFLAGS += -DWD_STATIC_DRV -fPIC
diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c
new file mode 100644
index 0000000..e2d81de
--- /dev/null
+++ b/drv/isa_ce_sm4.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+/*
+ * Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved.
+ */
+
+#include "drv/wd_cipher_drv.h"
+#include "wd_cipher.h"
+#include "isa_ce_sm4.h"
+
+#define SM4_ENCRYPT 1
+#define SM4_DECRYPT 0
+#define MSG_Q_DEPTH 1024
+#define INCREASE_BYTES 12
+#define SM4_BLOCK_SIZE 16
+#define MAX_BLOCK_NUM (1U << 28)
+#define CTR96_SHIFT_BITS 8
+
+#define GETU32(p) \
+ ((__u32)(p)[0] << 24 | (__u32)(p)[1] << 16 | (__u32)(p)[2] << 8 | (__u32)(p)[3])
+#define PUTU32(p, v) \
+ ((p)[0] = (__u8)((v) >> 24), (p)[1] = (__u8)((v) >> 16), \
+ (p)[2] = (__u8)((v) >> 8), (p)[3] = (__u8)(v))
+
+static int isa_ce_init(struct wd_alg_driver *drv, void *conf)
+{
+ struct wd_ctx_config_internal *config = conf;
+ struct sm4_ce_drv_ctx *sctx = drv->priv;
+
+ config->epoll_en = 0;
+ memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal));
+
+ return 0;
+}
+
+static void isa_ce_exit(struct wd_alg_driver *drv)
+{
+}
+
+/* increment upper 96 bits of 128-bit counter by 1 */
+static void ctr96_inc(__u8 *counter)
+{
+ __u32 n = INCREASE_BYTES;
+ __u32 c = 1;
+
+ do {
+ --n;
+ c += counter[n];
+ counter[n] = (__u8)c;
+ c >>= CTR96_SHIFT_BITS;
+ } while (n);
+}
+
+static void sm4_v8_ctr32_encrypt(__u8 *in, __u8 *out,
+ __u64 len, const struct SM4_KEY *key, __u8 *iv)
+{
+ __u8 ecount_buf[SM4_BLOCK_SIZE] = {0};
+ __u64 blocks, offset;
+ __u32 ctr32;
+ __u32 n = 0;
+
+ ctr32 = GETU32(iv + INCREASE_BYTES);
+ while (len >= SM4_BLOCK_SIZE) {
+ blocks = len / SM4_BLOCK_SIZE;
+ /*
+ * 1<<28 is just a not-so-small yet not-so-large number...
+ * Below condition is practically never met, but it has to
+ * be checked for code correctness.
+ */
+ if (blocks > MAX_BLOCK_NUM)
+ blocks = MAX_BLOCK_NUM;
+ /*
+ * As (*func) operates on 32-bit counter, caller
+ * has to handle overflow. 'if' below detects the
+ * overflow, which is then handled by limiting the
+ * amount of blocks to the exact overflow point...
+ */
+ ctr32 += (__u32)blocks;
+ if (ctr32 < blocks) {
+ blocks -= ctr32;
+ ctr32 = 0;
+ }
+ sm4_v8_ctr32_encrypt_blocks(in, out, blocks, key, iv);
+ /* (*ctr) does not update iv, caller does: */
+ PUTU32(iv + INCREASE_BYTES, ctr32);
+ /* ... overflow was detected, propagate carry. */
+ if (ctr32 == 0)
+ ctr96_inc(iv);
+ offset = blocks * SM4_BLOCK_SIZE;
+ len -= offset;
+ out += offset;
+ in += offset;
+ }
+ if (len) {
+ sm4_v8_ctr32_encrypt_blocks(ecount_buf, ecount_buf, 1, key, iv);
+ ++ctr32;
+ PUTU32(iv + INCREASE_BYTES, ctr32);
+ if (ctr32 == 0)
+ ctr96_inc(iv);
+ while (len--) {
+ out[n] = in[n] ^ ecount_buf[n];
+ ++n;
+ }
+ }
+}
+
+static void sm4_ctr_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc)
+{
+ sm4_v8_ctr32_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, msg->iv);
+}
+
+static void sm4_cbc_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc)
+{
+ sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, msg->iv, SM4_ENCRYPT);
+}
+
+static void sm4_cbc_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_dec)
+{
+ sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, msg->iv, SM4_DECRYPT);
+}
+
+void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key)
+{
+ sm4_v8_set_encrypt_key(userKey, key);
+}
+
+void sm4_set_decrypt_key(const __u8 *userKey, struct SM4_KEY *key)
+{
+ sm4_v8_set_decrypt_key(userKey, key);
+}
+
+static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg)
+{
+ struct wd_cipher_msg *msg = wd_msg;
+ struct SM4_KEY rkey;
+
+ if (!msg) {
+ WD_ERR("invalid: input sm4 msg is NULL!\n");
+ return -WD_EINVAL;
+ }
+
+ if (msg->data_fmt == WD_SGL_BUF) {
+ WD_ERR("invalid: SM4 CE driver do not support sgl data format!\n");
+ return -WD_EINVAL;
+ }
+
+ if (msg->op_type == WD_CIPHER_ENCRYPTION || msg->mode == WD_CIPHER_CTR)
+ sm4_set_encrypt_key(msg->key, &rkey);
+ else
+ sm4_set_decrypt_key(msg->key, &rkey);
+
+ switch (msg->mode) {
+ case WD_CIPHER_CBC:
+ if (msg->op_type == WD_CIPHER_ENCRYPTION)
+ sm4_cbc_encrypt(msg, &rkey);
+ else
+ sm4_cbc_decrypt(msg, &rkey);
+ break;
+ case WD_CIPHER_CTR:
+ sm4_ctr_encrypt(msg, &rkey);
+ break;
+ default:
+ WD_ERR("The current block cipher mode is not supported!\n");
+ return -WD_EINVAL;
+ }
+
+ return 0;
+}
+
+static int isa_ce_cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg)
+{
+ return 0;
+}
+
+static int cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *msg)
+{
+ return isa_ce_cipher_send(drv, ctx, msg);
+}
+
+static int cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg)
+{
+ return isa_ce_cipher_recv(drv, ctx, msg);
+}
+
+#define GEN_CE_ALG_DRIVER(ce_alg_name, alg_type) \
+{\
+ .drv_name = "isa_ce_sm4",\
+ .alg_name = (ce_alg_name),\
+ .calc_type = UADK_ALG_CE_INSTR,\
+ .priority = 200,\
+ .op_type_num = 1,\
+ .fallback = 0,\
+ .init = isa_ce_init,\
+ .exit = isa_ce_exit,\
+ .send = alg_type##_send,\
+ .recv = alg_type##_recv,\
+}
+
+static struct wd_alg_driver cipher_alg_driver[] = {
+ GEN_CE_ALG_DRIVER("cbc(sm4)", cipher),
+ GEN_CE_ALG_DRIVER("ctr(sm4)", cipher),
+};
+
+static void __attribute__((constructor)) isa_ce_probe(void)
+{
+ __u32 alg_num, i;
+ int ret;
+
+ WD_INFO("Info: register SM4 CE alg drivers!\n");
+
+ alg_num = ARRAY_SIZE(cipher_alg_driver);
+ for (i = 0; i < alg_num; i++) {
+ ret = wd_alg_driver_register(&cipher_alg_driver[i]);
+ if (ret && ret != -WD_ENODEV)
+ WD_ERR("Error: register SM4 CE %s failed!\n",
+ cipher_alg_driver[i].alg_name);
+ }
+}
+
+static void __attribute__((destructor)) isa_ce_remove(void)
+{
+ __u32 alg_num, i;
+
+ WD_INFO("Info: unregister SM4 CE alg drivers!\n");
+ alg_num = ARRAY_SIZE(cipher_alg_driver);
+ for (i = 0; i < alg_num; i++)
+ wd_alg_driver_unregister(&cipher_alg_driver[i]);
+}
diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h
new file mode 100644
index 0000000..0bc074d
--- /dev/null
+++ b/drv/isa_ce_sm4.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */
+
+#ifndef __SM4_CE_DRV_H
+#define __SM4_CE_DRV_H
+
+#pragma once
+#include <stdint.h>
+#include "wd_alg_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SM4_KEY_SCHEDULE 32
+
+struct SM4_KEY {
+ __u32 rk[SM4_KEY_SCHEDULE];
+};
+
+struct sm4_ce_drv_ctx {
+ struct wd_ctx_config_internal config;
+};
+
+
+void sm4_v8_set_encrypt_key(const unsigned char *userKey, struct SM4_KEY *key);
+void sm4_v8_set_decrypt_key(const unsigned char *userKey, struct SM4_KEY *key);
+void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const struct SM4_KEY *key,
+ unsigned char *ivec, const int enc);
+void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key, const unsigned char ivec[16]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SM4_CE_DRV_H */
diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S
new file mode 100644
index 0000000..d7d172a
--- /dev/null
+++ b/drv/isa_ce_sm4_armv8.S
@@ -0,0 +1,774 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/*
+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#include "../include/drv/arm_arch_ce.h"
+
+.arch armv8-a+crypto
+
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+ 16, 17, 18, 19, 20, 21, 22, 23 24, 25, 26, 27, 28, 29, 30, 31
+ .set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+ .inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+.macro sm4ekey, vd, vn, vm
+ .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
+.endm
+
+.text
+.align 6
+.Lck:
+.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269
+.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9
+.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249
+.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9
+.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229
+.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299
+.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
+.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
+.Lfk:
+.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+.globl sm4_v8_set_encrypt_key
+.type sm4_v8_set_encrypt_key,%function
+.align 5
+sm4_v8_set_encrypt_key:
+ AARCH64_VALID_CALL_TARGET
+ ld1 {v0.4s},[x0]
+ adr x2,.Lfk
+ ld1 {v24.4s},[x2]
+ adr x2,.Lck
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64
+#ifndef __ARMEB__
+ rev32 v0.16b,v0.16b
+#endif
+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2]
+ eor v0.16b,v0.16b,v24.16b;
+ sm4ekey v0.4s,v0.4s,v16.4s;
+ sm4ekey v1.4s,v0.4s,v17.4s;
+ sm4ekey v2.4s,v1.4s,v18.4s;
+ sm4ekey v3.4s,v2.4s,v19.4s;
+ sm4ekey v4.4s,v3.4s,v20.4s;
+ st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64
+ sm4ekey v5.4s,v4.4s,v21.4s;
+ sm4ekey v6.4s,v5.4s,v22.4s;
+ sm4ekey v7.4s,v6.4s,v23.4s;
+ st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1]
+ ret
+.size sm4_v8_set_encrypt_key,.-sm4_v8_set_encrypt_key
+.globl sm4_v8_set_decrypt_key
+.type sm4_v8_set_decrypt_key,%function
+.align 5
+sm4_v8_set_decrypt_key:
+ AARCH64_VALID_CALL_TARGET
+ ld1 {v7.4s},[x0]
+ adr x2,.Lfk
+ ld1 {v24.4s},[x2]
+ adr x2, .Lck
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64
+#ifndef __ARMEB__
+ rev32 v7.16b,v7.16b
+#endif
+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2]
+ eor v7.16b, v7.16b,v24.16b;
+ sm4ekey v7.4s,v7.4s,v16.4s;
+ sm4ekey v6.4s,v7.4s,v17.4s;
+ sm4ekey v5.4s,v6.4s,v18.4s;
+ rev64 v7.4s,v7.4s
+ rev64 v6.4s,v6.4s
+ ext v7.16b,v7.16b,v7.16b,#8
+ ext v6.16b,v6.16b,v6.16b,#8
+ sm4ekey v4.4s,v5.4s,v19.4s;
+ sm4ekey v3.4s,v4.4s,v20.4s;
+ rev64 v5.4s,v5.4s
+ rev64 v4.4s,v4.4s
+ ext v5.16b,v5.16b,v5.16b,#8
+ ext v4.16b,v4.16b,v4.16b,#8
+ sm4ekey v2.4s,v3.4s,v21.4s;
+ sm4ekey v1.4s,v2.4s,v22.4s;
+ rev64 v3.4s,v3.4s
+ rev64 v2.4s,v2.4s
+ ext v3.16b,v3.16b,v3.16b,#8
+ ext v2.16b,v2.16b,v2.16b,#8
+ sm4ekey v0.4s,v1.4s,v23.4s;
+ rev64 v1.4s, v1.4s
+ rev64 v0.4s, v0.4s
+ ext v1.16b,v1.16b,v1.16b,#8
+ ext v0.16b,v0.16b,v0.16b,#8
+ st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64
+ st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1]
+ ret
+.size sm4_v8_set_decrypt_key,.-sm4_v8_set_decrypt_key
+.globl sm4_v8_cbc_encrypt
+.type sm4_v8_cbc_encrypt,%function
+.align 5
+sm4_v8_cbc_encrypt:
+ AARCH64_VALID_CALL_TARGET
+ stp d8,d9,[sp, #-16]!
+
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3]
+ ld1 {v8.4s},[x4]
+ cmp w5,#0
+ b.eq .Ldec
+1:
+ cmp x2, #64
+ b.lt 1f
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64
+ eor v16.16b,v16.16b,v8.16b
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+ sm4e v16.4s,v0.4s;
+ sm4e v16.4s,v1.4s;
+ sm4e v16.4s,v2.4s;
+ sm4e v16.4s,v3.4s;
+ sm4e v16.4s,v4.4s;
+ sm4e v16.4s,v5.4s;
+ sm4e v16.4s,v6.4s;
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ ext v16.16b,v16.16b,v16.16b,#8
+ eor v17.16b,v17.16b,v16.16b
+ sm4e v17.4s,v0.4s;
+ sm4e v17.4s,v1.4s;
+ sm4e v17.4s,v2.4s;
+ sm4e v17.4s,v3.4s;
+ sm4e v17.4s,v4.4s;
+ sm4e v17.4s,v5.4s;
+ sm4e v17.4s,v6.4s;
+ sm4e v17.4s,v7.4s;
+ rev64 v17.4s,v17.4s
+ ext v17.16b,v17.16b,v17.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+ eor v18.16b,v18.16b,v17.16b
+ sm4e v18.4s,v0.4s;
+ sm4e v18.4s,v1.4s;
+ sm4e v18.4s,v2.4s;
+ sm4e v18.4s,v3.4s;
+ sm4e v18.4s,v4.4s;
+ sm4e v18.4s,v5.4s;
+ sm4e v18.4s,v6.4s;
+ sm4e v18.4s,v7.4s;
+ rev64 v18.4s,v18.4s
+ ext v18.16b,v18.16b,v18.16b,#8
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+ eor v19.16b,v19.16b,v18.16b
+ sm4e v19.4s,v0.4s;
+ sm4e v19.4s,v1.4s;
+ sm4e v19.4s,v2.4s;
+ sm4e v19.4s,v3.4s;
+ sm4e v19.4s,v4.4s;
+ sm4e v19.4s,v5.4s;
+ sm4e v19.4s,v6.4s;
+ sm4e v19.4s,v7.4s;
+ rev64 v19.4s,v19.4s
+ ext v19.16b,v19.16b,v19.16b,#8
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+ mov v8.16b,v19.16b
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
+ subs x2,x2,#64
+ b.ne 1b
+1:
+ subs x2,x2,#16
+ b.lt 3f
+ ld1 {v16.4s},[x0],#16
+ eor v8.16b,v8.16b,v16.16b
+#ifndef __ARMEB__
+ rev32 v8.16b,v8.16b
+#endif
+ sm4e v8.4s,v0.4s;
+ sm4e v8.4s,v1.4s;
+ sm4e v8.4s,v2.4s;
+ sm4e v8.4s,v3.4s;
+ sm4e v8.4s,v4.4s;
+ sm4e v8.4s,v5.4s;
+ sm4e v8.4s,v6.4s;
+ sm4e v8.4s,v7.4s;
+ rev64 v8.4s,v8.4s
+ ext v8.16b,v8.16b,v8.16b,#8
+#ifndef __ARMEB__
+ rev32 v8.16b,v8.16b
+#endif
+ st1 {v8.16b},[x1],#16
+ b.ne 1b
+ b 3f
+.Ldec:
+1:
+ cmp x2, #64
+ b.lt 1f
+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0]
+ ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64
+ cmp x2,#128
+ b.lt 2f
+ // 8 blocks mode
+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0]
+ ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v20.16b,v20.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v21.16b,v21.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v22.16b,v22.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v23.16b,v23.16b
+#endif
+ sm4e v16.4s,v0.4s;
+ sm4e v17.4s,v0.4s;
+ sm4e v18.4s,v0.4s;
+ sm4e v19.4s,v0.4s;
+
+ sm4e v16.4s,v1.4s;
+ sm4e v17.4s,v1.4s;
+ sm4e v18.4s,v1.4s;
+ sm4e v19.4s,v1.4s;
+
+ sm4e v16.4s,v2.4s;
+ sm4e v17.4s,v2.4s;
+ sm4e v18.4s,v2.4s;
+ sm4e v19.4s,v2.4s;
+
+ sm4e v16.4s,v3.4s;
+ sm4e v17.4s,v3.4s;
+ sm4e v18.4s,v3.4s;
+ sm4e v19.4s,v3.4s;
+
+ sm4e v16.4s,v4.4s;
+ sm4e v17.4s,v4.4s;
+ sm4e v18.4s,v4.4s;
+ sm4e v19.4s,v4.4s;
+
+ sm4e v16.4s,v5.4s;
+ sm4e v17.4s,v5.4s;
+ sm4e v18.4s,v5.4s;
+ sm4e v19.4s,v5.4s;
+
+ sm4e v16.4s,v6.4s;
+ sm4e v17.4s,v6.4s;
+ sm4e v18.4s,v6.4s;
+ sm4e v19.4s,v6.4s;
+
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ sm4e v17.4s,v7.4s;
+ ext v16.16b,v16.16b,v16.16b,#8
+ rev64 v17.4s,v17.4s
+ sm4e v18.4s,v7.4s;
+ ext v17.16b,v17.16b,v17.16b,#8
+ rev64 v18.4s,v18.4s
+ sm4e v19.4s,v7.4s;
+ ext v18.16b,v18.16b,v18.16b,#8
+ rev64 v19.4s,v19.4s
+ ext v19.16b,v19.16b,v19.16b,#8
+ sm4e v20.4s,v0.4s;
+ sm4e v21.4s,v0.4s;
+ sm4e v22.4s,v0.4s;
+ sm4e v23.4s,v0.4s;
+
+ sm4e v20.4s,v1.4s;
+ sm4e v21.4s,v1.4s;
+ sm4e v22.4s,v1.4s;
+ sm4e v23.4s,v1.4s;
+
+ sm4e v20.4s,v2.4s;
+ sm4e v21.4s,v2.4s;
+ sm4e v22.4s,v2.4s;
+ sm4e v23.4s,v2.4s;
+
+ sm4e v20.4s,v3.4s;
+ sm4e v21.4s,v3.4s;
+ sm4e v22.4s,v3.4s;
+ sm4e v23.4s,v3.4s;
+
+ sm4e v20.4s,v4.4s;
+ sm4e v21.4s,v4.4s;
+ sm4e v22.4s,v4.4s;
+ sm4e v23.4s,v4.4s;
+
+ sm4e v20.4s,v5.4s;
+ sm4e v21.4s,v5.4s;
+ sm4e v22.4s,v5.4s;
+ sm4e v23.4s,v5.4s;
+
+ sm4e v20.4s,v6.4s;
+ sm4e v21.4s,v6.4s;
+ sm4e v22.4s,v6.4s;
+ sm4e v23.4s,v6.4s;
+
+ sm4e v20.4s,v7.4s;
+ rev64 v20.4s,v20.4s
+ sm4e v21.4s,v7.4s;
+ ext v20.16b,v20.16b,v20.16b,#8
+ rev64 v21.4s,v21.4s
+ sm4e v22.4s,v7.4s;
+ ext v21.16b,v21.16b,v21.16b,#8
+ rev64 v22.4s,v22.4s
+ sm4e v23.4s,v7.4s;
+ ext v22.16b,v22.16b,v22.16b,#8
+ rev64 v23.4s,v23.4s
+ ext v23.16b,v23.16b,v23.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v20.16b,v20.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v21.16b,v21.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v22.16b,v22.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v23.16b,v23.16b
+#endif
+ eor v16.16b,v16.16b,v8.16b
+ eor v17.16b,v17.16b,v24.16b
+ eor v18.16b,v18.16b,v25.16b
+ mov v8.16b,v31.16b
+ eor v19.16b,v19.16b,v26.16b
+ eor v20.16b,v20.16b,v27.16b
+ eor v21.16b,v21.16b,v28.16b
+ eor v22.16b,v22.16b,v29.16b
+ eor v23.16b,v23.16b,v30.16b
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64
+ subs x2,x2,128
+ b.gt 1b
+ b 3f
+ // 4 blocks mode
+2:
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+ sm4e v16.4s,v0.4s;
+ sm4e v17.4s,v0.4s;
+ sm4e v18.4s,v0.4s;
+ sm4e v19.4s,v0.4s;
+
+ sm4e v16.4s,v1.4s;
+ sm4e v17.4s,v1.4s;
+ sm4e v18.4s,v1.4s;
+ sm4e v19.4s,v1.4s;
+
+ sm4e v16.4s,v2.4s;
+ sm4e v17.4s,v2.4s;
+ sm4e v18.4s,v2.4s;
+ sm4e v19.4s,v2.4s;
+
+ sm4e v16.4s,v3.4s;
+ sm4e v17.4s,v3.4s;
+ sm4e v18.4s,v3.4s;
+ sm4e v19.4s,v3.4s;
+
+ sm4e v16.4s,v4.4s;
+ sm4e v17.4s,v4.4s;
+ sm4e v18.4s,v4.4s;
+ sm4e v19.4s,v4.4s;
+
+ sm4e v16.4s,v5.4s;
+ sm4e v17.4s,v5.4s;
+ sm4e v18.4s,v5.4s;
+ sm4e v19.4s,v5.4s;
+
+ sm4e v16.4s,v6.4s;
+ sm4e v17.4s,v6.4s;
+ sm4e v18.4s,v6.4s;
+ sm4e v19.4s,v6.4s;
+
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ sm4e v17.4s,v7.4s;
+ ext v16.16b,v16.16b,v16.16b,#8
+ rev64 v17.4s,v17.4s
+ sm4e v18.4s,v7.4s;
+ ext v17.16b,v17.16b,v17.16b,#8
+ rev64 v18.4s,v18.4s
+ sm4e v19.4s,v7.4s;
+ ext v18.16b,v18.16b,v18.16b,#8
+ rev64 v19.4s,v19.4s
+ ext v19.16b,v19.16b,v19.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+ eor v16.16b,v16.16b,v8.16b
+ eor v17.16b,v17.16b,v24.16b
+ mov v8.16b,v27.16b
+ eor v18.16b,v18.16b,v25.16b
+ eor v19.16b,v19.16b,v26.16b
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
+ subs x2,x2,#64
+ b.gt 1b
+1:
+ subs x2,x2,#16
+ b.lt 3f
+ ld1 {v16.4s},[x0],#16
+ mov v24.16b,v16.16b
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+ sm4e v16.4s,v0.4s;
+ sm4e v16.4s,v1.4s;
+ sm4e v16.4s,v2.4s;
+ sm4e v16.4s,v3.4s;
+ sm4e v16.4s,v4.4s;
+ sm4e v16.4s,v5.4s;
+ sm4e v16.4s,v6.4s;
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ ext v16.16b,v16.16b,v16.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+ eor v16.16b,v16.16b,v8.16b
+ mov v8.16b,v24.16b
+ st1 {v16.16b},[x1],#16
+ b.ne 1b
+3:
+ // save back IV
+ st1 {v8.16b},[x4]
+ ldp d8,d9,[sp],#16
+ ret
+.size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt
+.globl sm4_v8_ctr32_encrypt_blocks
+.type sm4_v8_ctr32_encrypt_blocks,%function
+.align 5
+sm4_v8_ctr32_encrypt_blocks:
+ AARCH64_VALID_CALL_TARGET
+ stp d8,d9,[sp, #-16]!
+
+ ld1 {v8.4s},[x4]
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],64
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3]
+#ifndef __ARMEB__
+ rev32 v8.16b,v8.16b
+#endif
+ mov w5,v8.s[3]
+1:
+ cmp x2,#4
+ b.lt 1f
+ ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64
+ mov v16.16b,v8.16b
+ mov v17.16b,v8.16b
+ mov v18.16b,v8.16b
+ mov v19.16b,v8.16b
+ add w5,w5,#1
+ mov v17.s[3],w5
+ add w5,w5,#1
+ mov v18.s[3],w5
+ add w5,w5,#1
+ mov v19.s[3],w5
+ cmp x2,#8
+ b.lt 2f
+ ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64
+ mov v20.16b,v8.16b
+ mov v21.16b,v8.16b
+ mov v22.16b,v8.16b
+ mov v23.16b,v8.16b
+ add w5,w5,#1
+ mov v20.s[3],w5
+ add w5,w5,#1
+ mov v21.s[3],w5
+ add w5,w5,#1
+ mov v22.s[3],w5
+ add w5,w5,#1
+ mov v23.s[3],w5
+ sm4e v16.4s,v0.4s;
+ sm4e v17.4s,v0.4s;
+ sm4e v18.4s,v0.4s;
+ sm4e v19.4s,v0.4s;
+
+ sm4e v16.4s,v1.4s;
+ sm4e v17.4s,v1.4s;
+ sm4e v18.4s,v1.4s;
+ sm4e v19.4s,v1.4s;
+
+ sm4e v16.4s,v2.4s;
+ sm4e v17.4s,v2.4s;
+ sm4e v18.4s,v2.4s;
+ sm4e v19.4s,v2.4s;
+
+ sm4e v16.4s,v3.4s;
+ sm4e v17.4s,v3.4s;
+ sm4e v18.4s,v3.4s;
+ sm4e v19.4s,v3.4s;
+
+ sm4e v16.4s,v4.4s;
+ sm4e v17.4s,v4.4s;
+ sm4e v18.4s,v4.4s;
+ sm4e v19.4s,v4.4s;
+
+ sm4e v16.4s,v5.4s;
+ sm4e v17.4s,v5.4s;
+ sm4e v18.4s,v5.4s;
+ sm4e v19.4s,v5.4s;
+
+ sm4e v16.4s,v6.4s;
+ sm4e v17.4s,v6.4s;
+ sm4e v18.4s,v6.4s;
+ sm4e v19.4s,v6.4s;
+
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ sm4e v17.4s,v7.4s;
+ ext v16.16b,v16.16b,v16.16b,#8
+ rev64 v17.4s,v17.4s
+ sm4e v18.4s,v7.4s;
+ ext v17.16b,v17.16b,v17.16b,#8
+ rev64 v18.4s,v18.4s
+ sm4e v19.4s,v7.4s;
+ ext v18.16b,v18.16b,v18.16b,#8
+ rev64 v19.4s,v19.4s
+ ext v19.16b,v19.16b,v19.16b,#8
+ sm4e v20.4s,v0.4s;
+ sm4e v21.4s,v0.4s;
+ sm4e v22.4s,v0.4s;
+ sm4e v23.4s,v0.4s;
+
+ sm4e v20.4s,v1.4s;
+ sm4e v21.4s,v1.4s;
+ sm4e v22.4s,v1.4s;
+ sm4e v23.4s,v1.4s;
+
+ sm4e v20.4s,v2.4s;
+ sm4e v21.4s,v2.4s;
+ sm4e v22.4s,v2.4s;
+ sm4e v23.4s,v2.4s;
+
+ sm4e v20.4s,v3.4s;
+ sm4e v21.4s,v3.4s;
+ sm4e v22.4s,v3.4s;
+ sm4e v23.4s,v3.4s;
+
+ sm4e v20.4s,v4.4s;
+ sm4e v21.4s,v4.4s;
+ sm4e v22.4s,v4.4s;
+ sm4e v23.4s,v4.4s;
+
+ sm4e v20.4s,v5.4s;
+ sm4e v21.4s,v5.4s;
+ sm4e v22.4s,v5.4s;
+ sm4e v23.4s,v5.4s;
+
+ sm4e v20.4s,v6.4s;
+ sm4e v21.4s,v6.4s;
+ sm4e v22.4s,v6.4s;
+ sm4e v23.4s,v6.4s;
+
+ sm4e v20.4s,v7.4s;
+ rev64 v20.4s,v20.4s
+ sm4e v21.4s,v7.4s;
+ ext v20.16b,v20.16b,v20.16b,#8
+ rev64 v21.4s,v21.4s
+ sm4e v22.4s,v7.4s;
+ ext v21.16b,v21.16b,v21.16b,#8
+ rev64 v22.4s,v22.4s
+ sm4e v23.4s,v7.4s;
+ ext v22.16b,v22.16b,v22.16b,#8
+ rev64 v23.4s,v23.4s
+ ext v23.16b,v23.16b,v23.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v20.16b,v20.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v21.16b,v21.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v22.16b,v22.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v23.16b,v23.16b
+#endif
+ eor v16.16b,v16.16b,v24.16b
+ eor v17.16b,v17.16b,v25.16b
+ eor v18.16b,v18.16b,v26.16b
+ eor v19.16b,v19.16b,v27.16b
+ eor v20.16b,v20.16b,v28.16b
+ eor v21.16b,v21.16b,v29.16b
+ eor v22.16b,v22.16b,v30.16b
+ eor v23.16b,v23.16b,v31.16b
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64
+ subs x2,x2,#8
+ b.eq 3f
+ add w5,w5,#1
+ mov v8.s[3],w5
+ b 1b
+2:
+ sm4e v16.4s,v0.4s;
+ sm4e v17.4s,v0.4s;
+ sm4e v18.4s,v0.4s;
+ sm4e v19.4s,v0.4s;
+
+ sm4e v16.4s,v1.4s;
+ sm4e v17.4s,v1.4s;
+ sm4e v18.4s,v1.4s;
+ sm4e v19.4s,v1.4s;
+
+ sm4e v16.4s,v2.4s;
+ sm4e v17.4s,v2.4s;
+ sm4e v18.4s,v2.4s;
+ sm4e v19.4s,v2.4s;
+
+ sm4e v16.4s,v3.4s;
+ sm4e v17.4s,v3.4s;
+ sm4e v18.4s,v3.4s;
+ sm4e v19.4s,v3.4s;
+
+ sm4e v16.4s,v4.4s;
+ sm4e v17.4s,v4.4s;
+ sm4e v18.4s,v4.4s;
+ sm4e v19.4s,v4.4s;
+
+ sm4e v16.4s,v5.4s;
+ sm4e v17.4s,v5.4s;
+ sm4e v18.4s,v5.4s;
+ sm4e v19.4s,v5.4s;
+
+ sm4e v16.4s,v6.4s;
+ sm4e v17.4s,v6.4s;
+ sm4e v18.4s,v6.4s;
+ sm4e v19.4s,v6.4s;
+
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ sm4e v17.4s,v7.4s;
+ ext v16.16b,v16.16b,v16.16b,#8
+ rev64 v17.4s,v17.4s
+ sm4e v18.4s,v7.4s;
+ ext v17.16b,v17.16b,v17.16b,#8
+ rev64 v18.4s,v18.4s
+ sm4e v19.4s,v7.4s;
+ ext v18.16b,v18.16b,v18.16b,#8
+ rev64 v19.4s,v19.4s
+ ext v19.16b,v19.16b,v19.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v17.16b,v17.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v18.16b,v18.16b
+#endif
+#ifndef __ARMEB__
+ rev32 v19.16b,v19.16b
+#endif
+ eor v16.16b,v16.16b,v24.16b
+ eor v17.16b,v17.16b,v25.16b
+ eor v18.16b,v18.16b,v26.16b
+ eor v19.16b,v19.16b,v27.16b
+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64
+ subs x2,x2,#4
+ b.eq 3f
+ add w5,w5,#1
+ mov v8.s[3],w5
+ b 1b
+1:
+ subs x2,x2,#1
+ b.lt 3f
+ mov v16.16b,v8.16b
+ ld1 {v24.4s},[x0],#16
+ sm4e v16.4s,v0.4s;
+ sm4e v16.4s,v1.4s;
+ sm4e v16.4s,v2.4s;
+ sm4e v16.4s,v3.4s;
+ sm4e v16.4s,v4.4s;
+ sm4e v16.4s,v5.4s;
+ sm4e v16.4s,v6.4s;
+ sm4e v16.4s,v7.4s;
+ rev64 v16.4s,v16.4s
+ ext v16.16b,v16.16b,v16.16b,#8
+#ifndef __ARMEB__
+ rev32 v16.16b,v16.16b
+#endif
+ eor v16.16b,v16.16b,v24.16b
+ st1 {v16.4s},[x1],#16
+ b.eq 3f
+ add w5,w5,#1
+ mov v8.s[3],w5
+ b 1b
+3:
+ ldp d8,d9,[sp],#16
+ ret
+.size sm4_v8_ctr32_encrypt_blocks,.-sm4_v8_ctr32_encrypt_blocks
diff --git a/v1/wd.c b/v1/wd.c
index 26e7af3..4286bbe 100644
--- a/v1/wd.c
+++ b/v1/wd.c
@@ -88,7 +88,8 @@ static int get_raw_attr(const char *dev_root, const char *attr,
if (ptrRet == NULL)
return -WD_ENODEV;
- /* The attr_file = "/sys/class/uacce/xxx"
+ /*
+ * The attr_file = "/sys/class/uacce/xxx"
* It's the Internal Definition File Node
*/
fd = open(attr_path, O_RDONLY, 0);
diff --git a/v1/wd_rng.c b/v1/wd_rng.c
index 24a4b7a..7a89cd1 100644
--- a/v1/wd_rng.c
+++ b/v1/wd_rng.c
@@ -57,7 +57,7 @@ static int wcrypto_setup_qinfo(struct wcrypto_rng_ctx_setup *setup,
WD_ERR("algorithm mismatch!\n");
return ret;
}
- qinfo = q->qinfo;
+ qinfo = q->qinfo;
/* lock at ctx creating */
wd_spinlock(&qinfo->qlock);
if (qinfo->ctx_num >= WD_MAX_CTX_NUM) {
@@ -120,7 +120,7 @@ void *wcrypto_create_rng_ctx(struct wd_queue *q,
return ctx;
free_ctx_id:
- qinfo = q->qinfo;
+ qinfo = q->qinfo;
wd_spinlock(&qinfo->qlock);
qinfo->ctx_num--;
wd_free_id(qinfo->ctx_id, WD_MAX_CTX_NUM, ctx_id, WD_MAX_CTX_NUM);
diff --git a/wd_cipher.c b/wd_cipher.c
index f35ce6f..63ec362 100644
--- a/wd_cipher.c
+++ b/wd_cipher.c
@@ -622,10 +622,10 @@ static int send_recv_sync(struct wd_ctx_internal *ctx,
msg_handle.send = wd_cipher_setting.driver->send;
msg_handle.recv = wd_cipher_setting.driver->recv;
- pthread_spin_lock(&ctx->lock);
+ wd_ctx_spin_lock(ctx, wd_cipher_setting.driver->calc_type);
ret = wd_handle_msg_sync(wd_cipher_setting.driver, &msg_handle, ctx->ctx,
msg, NULL, wd_cipher_setting.config.epoll_en);
- pthread_spin_unlock(&ctx->lock);
+ wd_ctx_spin_unlock(ctx, wd_cipher_setting.driver->calc_type);
return ret;
}
--
2.25.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。