diff --git a/Performance.testing.md b/Performance.testing.md new file mode 100644 index 0000000000000000000000000000000000000000..3744d7c68f56ba0aebb209f8472f7a1755d983e2 --- /dev/null +++ b/Performance.testing.md @@ -0,0 +1,211 @@ +# Performance Testing + +性能测试方法。 + +- [Performance Testing](#performance-testing) + - [HiBench](#hibench) + - [系统能力检测](#系统能力检测) + - [UADK 及 硬件加速器](#uadk-及-硬件加速器) + - [I/O 设备,磁盘](#io-设备磁盘) + - [TeraSort Benchmark](#terasort-benchmark) + - [准备工作:设定 HDFS Transparent Encryption 模式](#准备工作设定-hdfs-transparent-encryption-模式) + - [`teragen` `terasort` 命令行使用举例](#teragen-terasort-命令行使用举例) + - [Terasort 2.4GB 数据 (24M rows)](#terasort-24gb-数据-24m-rows) + - [Terasort 25GB 数据 (256M rows)](#terasort-25gb-数据-256m-rows) + - [Terasort 100GB 数据 (1G rows)](#terasort-100gb-数据-1g-rows) + +## HiBench + +编译命令: + + git clone https://github.com/Intel-bigdata/HiBench.git HiBench.git + cd HiBench.git + mvn -Phadoopbench -Dspark=2.4 -Dscala=2.11 clean package + ... ... + [INFO] ------------------------------------------------------------------------ + [INFO] Reactor Summary: + [INFO] + [INFO] hibench 8.0-SNAPSHOT ............................... SUCCESS [ 0.135 s] + [INFO] hibench-common 8.0-SNAPSHOT ........................ SUCCESS [ 49.875 s] + [INFO] HiBench data generation tools 8.0-SNAPSHOT ......... SUCCESS [01:58 min] + [INFO] hadoopbench 8.0-SNAPSHOT ........................... SUCCESS [ 0.003 s] + [INFO] hadoopbench-sql 8.0-SNAPSHOT ....................... SUCCESS [ 05:15 h] + [INFO] mahout 8.0-SNAPSHOT ................................ SUCCESS [ 02:03 h] + [INFO] PEGASUS: A Peta-Scale Graph Mining System 2.0-SNAPSHOT SUCCESS [ 13.317 s] + [INFO] nutchindexing 8.0-SNAPSHOT ......................... SUCCESS [ 04:14 h] + [INFO] sparkbench 8.0-SNAPSHOT ............................ SUCCESS [ 0.009 s] + [INFO] sparkbench-common 8.0-SNAPSHOT ..................... SUCCESS [ 15.235 s] + [INFO] sparkbench micro benchmark 8.0-SNAPSHOT ............ SUCCESS [ 8.194 s] + [INFO] sparkbench machine learning benchmark 8.0-SNAPSHOT . SUCCESS [ 32.744 s] + [INFO] sparkbench-websearch 8.0-SNAPSHOT .................. SUCCESS [ 4.131 s] + [INFO] sparkbench-graph 8.0-SNAPSHOT ...................... SUCCESS [ 7.727 s] + [INFO] sparkbench-sql 8.0-SNAPSHOT ........................ SUCCESS [ 9.951 s] + [INFO] sparkbench project assembly 8.0-SNAPSHOT ........... SUCCESS [ 16.059 s] + [INFO] flinkbench 8.0-SNAPSHOT ............................ SUCCESS [ 0.003 s] + [INFO] flinkbench-streaming 8.0-SNAPSHOT .................. SUCCESS [ 33.173 s] + [INFO] gearpumpbench 8.0-SNAPSHOT ......................... SUCCESS [ 0.003 s] + [INFO] gearpumpbench-streaming 8.0-SNAPSHOT ............... SUCCESS [ 13.274 s] + [INFO] stormbench 8.0-SNAPSHOT ............................ SUCCESS [ 0.003 s] + [INFO] stormbench-streaming 8.0-SNAPSHOT .................. SUCCESS [ 11.749 s] + [INFO] ------------------------------------------------------------------------ + [INFO] BUILD SUCCESS + [INFO] ------------------------------------------------------------------------ + [INFO] Total time: 11:38 h + [INFO] Finished at: 2024-04-03T00:38:49+08:00 + [INFO] ------------------------------------------------------------------------ + +## 系统能力检测 + +### UADK 及 硬件加速器 + + while true; do echo "$(date +'%y/%m/%d %H:%M:%S ') Run:" && sleep 5 && cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT && echo ""; done + +### I/O 设备,磁盘 + + sudo yum install -yq sysstat iotop + iostat -m -p nvme0n1 nvme1n1 nvme2n1 nvme3n1 10 + iotop -d 10 + +## TeraSort Benchmark + +### 准备工作:设定 HDFS Transparent Encryption 模式 + +TeraSort把测试数据存放在统一的目录中。为了比较使用硬件加速后的效果,需要首先将测试目录指定为加密目录(i.e. encrption zone)。命令参考如下,此处以 `/zone2` 举例,`gd0325.1619`是加密密钥名称。 + + # hadoop key create gd0325.1619 + # hadoop fs -mkdir /zone2 + # hdfs crypto -createZone -keyName gd0325.1619 -path /zone2 + +关于如何使用TeraSort,可以参考这里的简化脚本: + + git clone https://github.com/sunileman/MapReduce-Performance_Testing TeraSort.testing.git + +### `teragen` `terasort` 命令行使用举例 + +举例,生成 100GB 数据 (1G row) + + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar \ + teragen \ + -Dmapred.map.tasks=95 \ + `expr 1024 \* 1024 \* 1024` \ + /zone2/terasort-input + +建议的 TeraGen task 数量为核数减一。`-Dmapred.map.task=(vcpu numbers - 1)` + + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar \ + terasort \ + -Dmapred.reduce.tasks=48 \ + /zone2/terasort-input \ + /zone2/terasort-output + + hadoop fs -rm -f -r /zone2/terasort-output + +建议的 TeraSort task 数量为核数的一半。`-Dmapred.reduce.task=(vcpu numbers divided by 2)` + +### Terasort 2.4GB 数据 (24M rows) + +KAEProvider + UADK: + + sed -i 's/BC/KAEProvider/g' etc/hadoop/core-site.xml + hadoop fs -rm -f -r /zone2/terasort-output + time hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar terasort -Dmapred.reduce.tasks=48 /zone2/terasort-input /zone2/terasort-output + + real 1m26.101s + user 1m55.372s + sys 0m15.341s + + real 1m28.176s + user 2m0.581s + sys 0m16.241s + + real 1m25.257s + user 1m42.370s + sys 0m15.169s + + real 1m26.243s + user 1m47.133s + sys 0m15.143s + +BC: + + sed -i 's/KAEProvider/BC/g' etc/hadoop/core-site.xml + + real 1m54.501s + user 2m43.461s + sys 0m14.087s + + real 1m56.346s + user 2m51.848s + sys 0m14.831s + + real 1m55.491s + user 2m53.159s + sys 0m15.617s + +cat /sys/class/uacce/hisi_sec*/device/numa_node + +### Terasort 25GB 数据 (256M rows) + + hadoop fs -rm -f -r /zone2/terasort-input + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar teragen -Dmapred.map.tasks=95 `expr 256 \* 1024 \* 1024` /zone2/terasort-input + +KAEProvider + UADK: + + sed -i 's/BC/KAEProvider/g' $HADOOP_HOME/etc/hadoop/core-site.xml + hadoop fs -rm -f -r /zone2/terasort-output + time hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar terasort -Dmapred.reduce.tasks=48 /zone2/terasort-input /zone2/terasort-output + + real 14m0.594s + user 14m0.104s + sys 2m6.624s + + real 13m51.222s + user 13m50.860s + sys 2m12.218s + +BC: + + sed -i 's/KAEProvider/BC/g' $HADOOP_HOME/etc/hadoop/core-site.xml + hadoop fs -rm -f -r /zone2/terasort-output + time hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar terasort -Dmapred.reduce.tasks=48 /zone2/terasort-input /zone2/terasort-output + + real 18m59.101s + user 22m36.986s + sys 1m52.592s + +### Terasort 100GB 数据 (1G rows) + + hadoop fs -rm -f -r /zone2/terasort-input + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar teragen -Dmapred.map.tasks=95 `expr 1024 \* 1024 \* 1024` /zone2/terasort-input + +KAEProvider + UADK: + + sed -i 's/BC/KAEProvider/g' $HADOOP_HOME/etc/hadoop/core-site.xml + + hadoop fs -rm -f -r /zone2/terasort-output + time \ + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar \ + terasort \ + -Dmapred.reduce.tasks=48 \ + /zone2/terasort-input \ + /zone2/terasort-output + + real 91m26.321s + user 82m31.546s + +BC: + + sed -i 's/KAEProvider/BC/g' $HADOOP_HOME/etc/hadoop/core-site.xml + hadoop fs -rm -f -r /zone2/terasort-output + time \ + hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*examples*.jar \ + terasort \ + -Dmapred.reduce.tasks=48 \ + /zone2/terasort-input \ + /zone2/terasort-output + + real 135m47.011s + user 143m39.925s + sys 6m34.882s + +typical CPU load average (in 5 minutes): 1.32+ \ No newline at end of file diff --git a/Quick.Start.Guide.md b/Quick.Start.Guide.md index 48c8329a3f1537cbe0a0ccab1d600c6cb2563766..fcefa0b4aac9df9f2604c0a81d03ce317ac3318f 100644 --- a/Quick.Start.Guide.md +++ b/Quick.Start.Guide.md @@ -130,6 +130,14 @@ Tag:[v1.0-tag2.6-sm4-ctr](https://github.com/docularxu/uadk/releases/tag/v1.0- 在主机环境下用普通用户身份编译。 +在 openEuler OS上,需要安装依赖包`numactl-devel`。 + + sudo yum install -y -q numactl-devel + +在 Ubuntu OS上,运行: + + sudo apt install libnuma-dev + ### 编译命令 $ cd uadk.git @@ -148,12 +156,17 @@ Tag:[v1.0-tag2.6-sm4-ctr](https://github.com/docularxu/uadk/releases/tag/v1.0- 在能够使用 UADK 功能之前,需要设置相关的环境变量。为了方便使用,环境变量(包括此处为 UADK 准备的,也包括下面 UADK Provider 以及 OpenSSL 3.0+ 相关)集中放在这个脚本文件里:[uadk-set-env.sh](https://github.com/docularxu/build-containers/blob/main/metadata/uadk-set-env.sh)。使用时,在当前环境下 `. ` 运行(相当于`source`命令)将文件中的内容导入当前shell环境。 - $ . ./uadk-set-env.sh + $ . ./build-containers.git/metadata/uadk-set-env.sh UADK 测试命令,推荐使用 `uadk_tool`。前述步骤正常的话,它已经被安装到了 `/usr/local/bin` 之下。可直接运行。 $ uadk_tool benchmark --alg sm4-128-ctr --mode sva --opt 0 --sync --pktlen 1024 --seconds 5 --multi 1 --thread 2 --ctxnum 6 + algname: length: perf: iops: CPU_rate: + sm4-128-ctr 1024Bytes 195300.4KB/s 195.3Kops 53.20% + $ uadk_tool benchmark --alg sm4-128-ecb --mode sva --opt 0 --sync --pktlen 1024 --seconds 5 --multi 1 --thread 2 --ctxnum 6 + algname: length: perf: iops: CPU_rate: + sm4-128-ecb 1024Bytes 168505.8KB/s 168.5Kops 47.60% ## OpenSSL 3.0 @@ -229,7 +242,7 @@ Tag:[v1.0-tag1.3-sm4-ctr](https://github.com/docularxu/uadk_engine/releases/ta 这一步有可能出现安装路径是`/usr/local/lib`的情况。为了让 OpenSSL 3.0 能正确找到 UADK Proivder (uadk_provider.so),需要手动把 `uadk_provider.*` 移动到 `/usr/local/lib/ossl-modules` 目录下。 - $ sudo mv /usr/local/lib/uadk_procider.* /usr/local/lib/ossl-modules + $ sudo mv /usr/local/lib/uadk_provider.* /usr/local/lib/ossl-modules ### 测试验证 @@ -262,4 +275,5 @@ Tag:[v1.0-tag1.3-sm4-ctr](https://github.com/docularxu/uadk_engine/releases/ta 在执行这些操作之前和之后,查看加速器硬件寄存器 `QM_DFX_DB_CNT` 值的是否有变化,来判断 UADK 确实被使用。 - sudo cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT \ No newline at end of file + sudo su + cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT \ No newline at end of file diff --git a/README.en.md b/README.en.md index 36f21e102140bc36bcb5c778a509083135da3efc..4871211b8acaad0ea897ff0f91c2accf012bb263 100644 --- a/README.en.md +++ b/README.en.md @@ -1,36 +1,64 @@ -# uadk-bigdata +# BigData + UADK: A full-stack solution to accelerate big data processing -#### Description -UADK is a general-purpose user space accelerator framework that uses the SVA technology to provide a unified programming interface for hardware acceleration computing cryptography and compression algorithms. Uadk-bigdata provides uadk solution in bigdata scenario. +#### Background +Modern computing environments offer a variety of hardware acceleration features, such as encryption and compression, to improve performance and efficiency. UADK is a unified user-mode programming interface suite, designed based on Shared Virtual Addresses (SVA) technology, designed to make these hardware acceleration functions easy to use. However, in a big data environment, it is not easy to take full advantage of these hardware acceleration features, which requires close collaboration between hardware and software. -#### Software Architecture -Software architecture description +### Project Introduction -#### Installation +This project aims to build a full-stack acceleration solution from hardware to application layer, especially in the field of big data. Our goal is to integrate the capabilities of the UADK with OpenSSL 3.0 and efficiently export these capabilities to a big data software stack that typically relies on the Java Development Kit (JDK) to ensure high performance, stability, and cross-platform compatibility. The core of this project is to establish an effective path to connect the JDK with OpenSSL 3.0 to provide a complete hardware acceleration solution for big data applications. -1. xxxx -2. xxxx -3. xxxx +### Software architecture -#### Instructions +We built a full-stack solution, as shown in the diagram below. From top to bottom, HBase (for big data storage and processing), JDK (for cross-platform support and performance optimization), OpenSSL (for security and encryption), UADK (for hardware acceleration), Linux kernel drivers (for hardware communication), and hardware accelerators at the bottom. ![hbase+uadk+software-stack-information](./pictures/sw.stack.png) -1. xxxx -2. xxxx -3. xxxx +### The main innovations -#### Contribution +1. Dynamic scheduling mechanism: We have implemented a dynamic scheduling mechanism in OpenSSL 3.0 called load-balancing. It allows multiple implementations of the same algorithm and the ability to dynamically deploy computational tasks to the most appropriate hardware unit based on the load state of the system. +2. Efficient Symmetric Encryption Algorithm: We chose SM4 as the symmetric encryption algorithm in the project and integrated it into HDFS Transparent Encryption. The purpose of this is to verify the performance of SM4 in big data scenarios, especially after acceleration by multiple computing power units. -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request +Through these innovations, we expect to dramatically improve the efficiency and performance of big data processing while ensuring data security. +#### The installation tutorial is explained in detail -#### Gitee Feature +This tutorial describes how to build a BigData + UADK solution in an openEuler environment. Follow these steps: -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) +1. **Prepare the openEuler environment** + - Make sure your system is openEuler 23.09, which provides good underlying support for hardware acceleration. + - Update the system package and install the necessary dependencies to ensure that the system runs stably and is compatible with subsequent installation steps. +2. **UADK project code pulling, compiling, installing** + - Pull the UADK project code from the official repository. + - Set the necessary compilation environment and parameters according to the project documentation. + - Compile the UADK source code and install the compiled product according to the guidance document. +3. **OpenSSL 3.0 and OpenSSL providers (uadk_provider) installed** + - Download and install OpenSSL 3.0 and make sure you have a UADK-compatible version installed. + - Install uadk_provider, which is an OpenSSL hardware acceleration provider that is tightly integrated with UADK. + - Configure OpenSSL to make sure it recognizes and uses uadk_provider correctly. +4. **Bisheng JDK 8.0 installation and configuration** + - Download and install Ascension JDK 8.0, a version of the JDK tailored for performance optimization. + - Configure environment variables to ensure that systems and applications can find and use the BethenJDK. +5. **HDFS/Hadoop installation, HBase installation** + - Install Hadoop and ensure that HDFS is running properly, which is the foundation for big data storage and processing. + - Build HBase on the basis of Hadoop, which is a highly reliable and high-performance distributed database suitable for big data scenarios. +6. **Joint debugging, testing** + - After the entire software stack is installed, joint debugging is performed to ensure that the components work together correctly. + - Execute test cases to ensure that hardware acceleration is being utilized correctly and that the overall performance of the system is as expected. + - For any problems encountered, refer to the documentation for each component for debugging, and record the resolution process and results. + +By following the preceding steps, you will be able to build a high-performance big data processing platform in the openEuler environment, take full advantage of the hardware acceleration function, and improve data processing efficiency and system performance. + +#### Description of the code structure + +This project is a code integration project by integrating the existing ones. In order to facilitate the user's understanding, the configuration code is provided as a reference. + +1. ./uadk +2. ./openssl +3. ./bisheng_jdk +4. ./hbase +5. ./pictures + +#### How to contribute + +1. Fork this repository +2. Create a new Feat_xxx branch +3. Submit the code +4. Create a new pull request diff --git a/hbase.hadoop.kms.md b/hbase.hadoop.kms.md index 7f89fc955e08c9463402a72466ec62aaef6e1909..47f530ae4364d207b7a8e58e3325cde13a96017c 100644 --- a/hbase.hadoop.kms.md +++ b/hbase.hadoop.kms.md @@ -15,6 +15,7 @@ - [Hadoop, HBase 服务启动和停止](#hadoop-hbase-服务启动和停止) - [启动](#启动) - [停止](#停止) + - [多节点集群部署的 start/stop 脚本](#多节点集群部署的-startstop-脚本) - [Hadoop / HDFS 验证测试](#hadoop--hdfs-验证测试) - [测试用例:a oneliner](#测试用例a-oneliner) - [`hbase pe` 命令提示](#hbase-pe-命令提示) @@ -183,6 +184,63 @@ The buffer size used by CryptoInputStream and CryptoOutputStream. hadoop --daemon stop kms ${HADOOP_HOME}/sbin/stop-all.sh +### 多节点集群部署的 start/stop 脚本 + + # ls + mount.sh start_hadoop.sh stop_hadoop.sh + # cat mount.sh + #!/bin/sh + + ip_arr="agent1 agent2 agent3" + function mount_disk(){ + for ip in $ip_arr + do + ssh $ip "sh /root/init.sh" + done + } + + mount_disk + + # cat start_hadoop.sh + #!/bin/sh + + $HADOOP_HOME/sbin/start-all.sh + $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver + $HADOOP_HOME/bin/hadoop --daemon start kms + + # cat stop_hadoop.sh + #!/bin/sh + + $HADOOP_HOME/sbin/stop-all.sh + $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver + $HADOOP_HOME/bin/hadoop --daemon stop kms + + # cat init_ssd.sh + #!/bin/sh + + j=1 + for i in 0 2 3;do + umount /dev/nvme${i}n1 + mkdir -p /srv/BigData/hadoop/data${j} + #rm /home/hadoop -rf + #mkfs.ext4 -F /dev/nvme${i}n1 + mount /dev/nvme${i}n1 /srv/BigData/hadoop/data${j} + j=$(($j+1)) + done + + # cat init.sh + #!/bin/sh + + j=1 + for i in a b c d e f g h i j k l;do + umount /dev/sd$i + mkdir -p /srv/BigData/hadoop/data${j} + #rm /home/hadoop -rf + # mkfs.ext4 -F /dev/sd$i + mount /dev/sd$i /srv/BigData/hadoop/data${j} + j=$(($j+1)) + done + ## Hadoop / HDFS 验证测试 各种命令的输出: @@ -225,7 +283,7 @@ The buffer size used by CryptoInputStream and CryptoOutputStream. ## 测试用例:a oneliner - # for i in {1..100}; do echo "Run $i:" && hadoop fs -rm /zone5/jre-bisheng-jdk8u402-kaeprovider-ossl3.0-0325.tar.gz || true && hadoop fs -put jre-bisheng-jdk8u402-kaeprovider-ossl3.0-0325.tar.gz /zone5 && cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT && echo ""; done + # for i in {1..1}; do echo "Run $i:" && hadoop fs -rm /zone2/jre-bisheng-jdk8u402-kaeprovider-ossl3.0-0325.tar.gz || true && hadoop fs -put /usr/lib/jvm/jre-bisheng-jdk8u402-kaeprovider-ossl3.0-0325.tar.gz /zone2 && cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT && echo ""; done 在执行这些操作之前和之后,查看加速器硬件寄存器 `QM_DFX_DB_CNT` 值的是否有变化,来判断 UADK 确实被使用。 diff --git a/openEuler 2403 Installation Guide.md b/openEuler 2403 Installation Guide.md new file mode 100644 index 0000000000000000000000000000000000000000..94b515e09bbd9db4eb7986726ed27b63f26ca140 --- /dev/null +++ b/openEuler 2403 Installation Guide.md @@ -0,0 +1,100 @@ +# 通过MobaXterm远程安装openEuler 24.03 指南 + +## 目录 + +- [环境](#环境) +- [准备工作](#准备工作) +- [远程连接到服务器](#远程连接到服务器) +- [下载 openEuler 24.03 ISO 镜像](#下载-openeuler-2403-iso-镜像) +- [上传 ISO 镜像到服务器](#上传-iso-镜像到服务器) +- [挂载 openEuler 24.03 ISO 镜像](#挂载-openeuler-2403-iso-镜像) +- [安装 openEuler 24.03](#安装-openeuler-2403) +- [配置系统](#配置系统) +- [完成安装](#完成安装) + +--- + +## 环境 + +- **目标操作系统**:openEuler 24.03 LTS版本 +- **硬件配置**:TaiShan 200 (Model 2280) (VD)服务器 +- **工具**:MobaXterm + +## 准备工作 + +确保已完成以下准备工作: + +- 登录uniVPN连接IP +- 下载并安装 MobaXterm 客户端:[MobaXterm官网](https://mobaxterm.mobatek.net/download.html)。 +- 获得远程服务器的IP地址(管理IP)、用户名和密码。 + +## 远程连接到服务器 + +打开 MobaXterm,并按照以下步骤连接到服务器: + +- 点击上方的 "Session"(会话)按钮。 +- 在弹出的窗口中选择 "SSH" 会话类型。 +- 在 "Remote host"(远程主机)栏中输入服务器的IP地址(管理IP)。 +- 在 "Specify username"(指定用户名)栏中输入您的用户名。(默认管理员账户为“root”) +- 点击 "OK" 连接到服务器,然后输入密码以完成连接。 + +## 下载 openEuler 24.03 ISO 镜像 + +- 在本地计算机上,打开浏览器并下载 openEuler 24.03 的 ISO 镜像文件:[下载地址](https://www.openeuler.org/zh/download/archive/detail/?version=openEuler%2024.03%20LTS)。 + +- 架构选择“AArch64”,场景选择“服务器”,在内存允许的情况下可选择Offline Everything ISO(离线完整版)。 + +- 解压下载文件 + +## 上传 ISO 镜像到服务器 + + 使用 MobaXterm 将下载好的 ISO 镜像上传到远程服务器: + +- 点击左侧状态栏的“地球”图标(SFTP)。 +- 点击绿色箭头(Upload to current folder)。 +- 在本地计算机中找到已下载的 openEuler 24.03 的 ISO 镜像文件。 + +## 挂载 openEuler 24.03 ISO 镜像 + +在服务器上,执行以下命令来挂载 ISO 镜像到系统: + +```bash +# 创建用于挂载ISO的目录 +sudo mkdir /mnt/iso + +# 挂载ISO文件到创建的目录 +sudo mount -o loop /path/to/openEuler-24.03.iso /mnt/iso +``` + +其中“/path/to/openEuler-24.03.iso”替换为ISO的实际文件路径 + +## 安装 openEuler 24.03 + +ISO镜像挂载完成后,进入挂载目录并执行安装脚本: + +```bash +# 进入挂载目录 +cd /mnt/iso + +# 运行安装脚本 +sudo ./install.sh +``` + +安装过程可能会要求进行一些配置,如分区、用户设置等。 + +## 配置系统 + +安装完成后,根据系统提示进行必要的配置,包括但不限于: + +```bash +# 设置系统语言和时区 +sudo dpkg-reconfigure tzdata + +# 创建用户账号和密码 +sudo adduser username +sudo passwd username +``` + +## 完成安装 + +安装和配置完成后,根据安装程序的指示重启服务器。安装过程中可能需要在服务器的控制台(MobaXterm 的会话窗口)上进行一些额外的配置和确认操作。 diff --git a/openssl.md b/openssl.md index 7a4368c46f675e358620eb54333e3d6f60194ccc..15144ab2a98d960bdf4878deb704eadac22daa26 100644 --- a/openssl.md +++ b/openssl.md @@ -7,7 +7,15 @@ - [配置环境变量](#配置环境变量) - [验证 uadk\_provider](#验证-uadk_provider) - [uadk\_provider 能支持的算法有哪些?](#uadk_provider-能支持的算法有哪些) - - [场景一:使用 `openssl speed` 验证 SM4-CTR 用 default 还是 uadk\_provier 更快?](#场景一使用-openssl-speed-验证-sm4-ctr-用-default-还是-uadk_provier-更快) + - [场景一:使用 `openssl speed` 验证 SM4-CTR 用 default 还是 uadk\_provider 更快?](#场景一使用-openssl-speed-验证-sm4-ctr-用-default-还是-uadk_provider-更快) + - [async\_jobs 1](#async_jobs-1) + - [async\_jobs 10](#async_jobs-10) + - [multi 10](#multi-10) + - [multi 10 with two hisi\_sec devices working in parallel](#multi-10-with-two-hisi_sec-devices-working-in-parallel) + - [multi 20 with two hisi\_sec devices working in parallel](#multi-20-with-two-hisi_sec-devices-working-in-parallel) + - [multi 40 with two hisi\_sec devices working in parallel](#multi-40-with-two-hisi_sec-devices-working-in-parallel) + - [multi 60 with two hisi\_sec devices working in parallel](#multi-60-with-two-hisi_sec-devices-working-in-parallel) + - [multi 80 with two hisi\_sec devices working in parallel](#multi-80-with-two-hisi_sec-devices-working-in-parallel) - [场景二:SM4](#场景二sm4) 本文介绍为 OpenSSL 3.0 开发的多算力平台场景动态负载平衡的测试场景。 @@ -26,7 +34,6 @@ ldprov 提供一个通用的动态负载均衡能力。本文以两个算法在 sudo make -j 100 install - ### 使用`pkg-config`验证 OpenSSL 3.0 和 UADK 的安装 $ export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig/ @@ -104,15 +111,135 @@ ldprov 提供一个通用的动态负载均衡能力。本文以两个算法在 IDs: { 1.2.840.113549.1.1.1, 2.5.8.1.1, RSA, rsaEncryption } @ uadk_provider IDs: { 1.2.840.113549.1.3.1, DH, dhKeyAgreement } @ uadk_provider -## 场景一:使用 `openssl speed` 验证 SM4-CTR 用 default 还是 uadk_provier 更快? +## 场景一:使用 `openssl speed` 验证 SM4-CTR 用 default 还是 uadk_provider 更快? -综合测试对比 uadk_provider vs. default: +综合测试对比 SM4-CTR 算法, uadk_provider vs. default 哪个更快: - openssl speed -provider uadk_provider -provider default -async_jobs 1 -evp sm4-cbc - openssl speed -provider default -async_jobs 1 -evp sm4-cbc openssl speed -provider uadk_provider -provider default -async_jobs 1 -evp sm4-ctr + cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT openssl speed -provider default -async_jobs 1 -evp sm4-ctr + cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT + +Note: `cat /sys/kernel/debug/hisi_sec2/*/qm/regs | grep QM_DFX_DB_CNT` + +### async_jobs 1 + + # openssl speed -provider default -async_jobs 1 -evp sm4-ctr + Doing SM4-CTR for 3s on 16 size blocks: 6002423 SM4-CTR's in 2.99s + Doing SM4-CTR for 3s on 64 size blocks: 5337232 SM4-CTR's in 3.00s + Doing SM4-CTR for 3s on 256 size blocks: 2348146 SM4-CTR's in 3.00s + Doing SM4-CTR for 3s on 1024 size blocks: 604520 SM4-CTR's in 3.00s + Doing SM4-CTR for 3s on 8192 size blocks: 75727 SM4-CTR's in 2.99s + Doing SM4-CTR for 3s on 16384 size blocks: 37875 SM4-CTR's in 3.00s + version: 3.2.0-dev + built on: Sun Mar 24 08:45:29 2024 UTC + options: bn(64,64) + compiler: gcc -fPIC -pthread -Wa,--noexecstack -Wall -O3 -DOPENSSL_USE_NODELETE -DOPENSSL_PIC -DOPENSSL_BUILDING_OPENSSL -DNDEBUG + CPUINFO: OPENSSL_armcap=0xbd + The 'numbers' are in 1000s of bytes per second processed. + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 32119.99k 113860.95k 200375.13k 206342.83k 207476.78k 206848.00k + + # openssl speed -provider uadk_provider -provider default -async_jobs 1 -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 3866.05k 15495.25k 62183.06k 237321.23k 456612.35k 471474.08k + +### async_jobs 10 + + # openssl speed -provider default -async_jobs 10 -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 32012.54k 114239.17k 200504.06k 206341.80k 206779.73k 206787.93k + + # openssl speed -provider uadk_provider -provider default -async_jobs 10 -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 5579.10k 17339.43k 52649.66k 196011.89k 1228592.55k 2281140.00k + +### multi 10 + + # openssl speed -provider default -multi 10 -seconds 120 -elapsed -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 319949.11k 1138384.13k 2005259.69k 2062083.75k 2066989.06k 2067649.88k + SM4-CTR 319931.07k 1137867.03k 2003471.89k 2061820.07k 2065891.87k 2066369.74k + + # openssl speed -provider uadk_provider -provider default -async_jobs 10 -multi 10 -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 40662.10k 143187.75k 529901.48k 2818294.78k 3856102.74k 3849431.72k + +### multi 10 with two hisi_sec devices working in parallel + + # openssl speed -provider uadk_provider -provider default -multi 10 -seconds 120 -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 13500.34k 63651.29k 235971.03k 840059.72k 3034632.94k 3739119.34k + SM4-CTR 13509.76k 57346.87k 222764.13k 788640.73k 2961521.60k 3670497.28k + + # openssl speed -provider uadk_provider -provider default -multi 10 -seconds 120 -elapsed -evp sm4-ctr + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 17683.47k 63330.66k 241356.35k 839610.06k 3044970.84k 3711369.90k + SM4-CTR 4231.23k 47430.67k 76532.02k 599274.04k 2591518.99k 3370097.60k + SM4-CTR 4681.11k 42687.71k 78961.14k 578202.55k 2549159.66k 3327796.70k + +### multi 20 with two hisi_sec devices working in parallel + + # openssl speed -provider default -multi 20 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 640019.85k 2276538.37k 4006128.86k 4124605.85k 4133664.36k 4134647.40k + SM4-CTR 640069.68k 2274927.57k 4015306.61k 4122030.80k 4132623.56k 4134277.12k + + # openssl speed -provider uadk_provider -provider default -multi 20 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 7282.07k 60850.00k 149914.38k 1123897.45k 5156714.09k 6561099.68k + SM4-CTR 7654.95k 50485.59k 143187.19k 1168020.43k 5131083.37k 6708602.47k + +### multi 40 with two hisi_sec devices working in parallel + + # openssl speed -provider default -multi 40 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 1279971.26k 4552531.43k 8015539.14k 8248040.60k 8265528.52k 8268136.45k + SM4-CTR 1279780.41k 4552059.69k 8048823.58k 8005700.04k 7924151.91k 7953707.83k + + # openssl speed -provider uadk_provider -provider default -multi 40 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 21306.24k 64037.45k 326836.94k 1621123.33k 7629483.40k 7756404.33k + SM4-CTR 16398.13k 54053.68k 296889.87k 1859460.35k 7728587.57k 7756309.76k + SM4-CTR 19884.44k 64849.51k 339523.41k 1564984.06k 7749677.47k 7756678.76k + SM4-CTR 18531.02k 68479.91k 296649.13k 1631692.60k 7745306.62k 7756664.01k + +### multi 60 with two hisi_sec devices working in parallel + + # openssl speed -provider default -multi 60 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 1919873.06k 6828072.19k 11419966.16k 11500206.03k 11561990.55k 11437108.19k + SM4-CTR 1919545.31k 6686775.22k 10699080.42k 11070904.01k 11076703.03k 10980688.69k + + # openssl speed -provider uadk_provider -provider default -multi 60 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 25518.95k 79736.29k 363007.69k 1669525.25k 7752430.39k 7758521.96k + SM4-CTR 22312.36k 66975.32k 323648.76k 1253113.96k 7753025.13k 7758765.62k + SM4-CTR 23749.19k 73341.22k 364654.96k 1450117.68k 7752751.51k 7758544.69k + SM4-CTR 26894.66k 84114.65k 344945.08k 1599865.31k 7753107.87k 7758792.98k + +### multi 80 with two hisi_sec devices working in parallel + + # openssl speed -provider default -multi 80 -seconds 20 -elapsed -evp sm4-ctr + SM4-CTR 2476187.60k 8013620.59k 12982942.26k 13367193.80k 13421463.70k 13401955.44k + SM4-CTR 2440658.92k 7843174.65k 13100002.51k 13239890.89k 13205257.83k 13346893.00k + +cpu load average 77% + + # openssl speed -provider uadk_provider -provider default -multi 80 -seconds 20 -elapsed -evp sm4-ctr + ... uadk_err: "do hw ciphers failed. " + SM4-CTR 25791.72k 70921.82k 339090.87k 1352893.18k 7751837.70k 7759396.15k + SM4-CTR 24297.16k 71918.61k 362315.88k 1366651.08k 7752460.29k 7759238.51k + +Note: openssl speed usage, + + -multi num + Run multiple operations in parallel. + + -async_jobs num + Enable async mode and start specified number of jobs. + + -elapsed + When calculating operations- or bytes‐per‐second, use wall‐clock time instead of CPU user time as divisor. It can be useful when testing speed of hardware engines. + + -seconds num + Run benchmarks for num seconds. - TODO:【2023/09】能看出uadk_provider测得的数据, sm4-cbc / sm4-ctr 提高0.6~1倍,在起作用。 + -bytes num + Run benchmarks on num-byte buffers. Affects ciphers, digests and the CSPRNG. The limit on the size of the buffer is INT_MAX - 64 bytes, which for a 32-bit int would be 2147483583 bytes. ## 场景二:SM4 diff --git a/uadk.md b/uadk.md index 285794ffaa706c02b60edcdc3019e1c7f4c2413d..c34112e963e35bb4b406f4824a153034b4815fdc 100644 --- a/uadk.md +++ b/uadk.md @@ -35,8 +35,25 @@ 重启服务器。检查如下设备存在,则证明加速器硬件启动以及内核模块加载成功。 - # ls /dev/hisi* - hisi_hpre-4 hisi_hpre-5 hisi_sec2-1 hisi_sec2-3 hisi_zip-0 hisi_zip-2 + [root@agent3 ~]# lspci -tv + ... + +-[0000:74]-+-00.0-[75]----00.0 Huawei Technologies Co., Ltd. HiSilicon ZIP Engine + | +-01.0-[76]----00.0 Huawei Technologies Co., Ltd. HiSilicon SEC Engine + ... + +-[0000:b4]-+-00.0-[b5]----00.0 Huawei Technologies Co., Ltd. HiSilicon ZIP Engine + | +-01.0-[b6]----00.0 Huawei Technologies Co., Ltd. HiSilicon SEC Engine + + + depmod -a + modprobe uacce + modprobe hisi_qm + modprobe hisi_zip uacce_mode=1 + modprobe hisi_sec2 uacce_mode=1 + modprobe hisi_hpre uacce_mode=1 + cat /sys/bus/pci/drivers/hisi_sec2/module/parameters/uacce_mode + 1 + ls /dev/hisi* + hisi_hpre-4 hisi_hpre-5 hisi_sec2-1 hisi_sec2-3 hisi_zip-0 hisi_zip-2 需要将设备属性修改为 `777`,重点是开放设备的读写权限。改变权限的原因是为了确保用户可以有效地访问和使用硬件加速器。硬件加速器通过UADK注册后,在/dev目录中会创建相应的字符设备。为了让用户能够与这些字符设备交互,进行数据读写操作,从而充分利用硬件加速器的功能,需要为用户开放相应的写权限。这样,用户就能向硬件加速器发送命令或数据,实现对硬件加速器资源的有效利用。