1 Star 0 Fork 82

MYX/openjdk-1.8.0

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
0034-8250902-Implement-MD5-Intrinsics-on-x64.patch 17.22 KB
一键复制 编辑 原始数据 按行查看 历史
Date: Fri, 9 Jun 2023 09:23:12 +0800
Subject: 8250902:Implement MD5 Intrinsics on x64
Bug url: https://bugs.openjdk.org/browse/JDK-8250902
---
hotspot/src/cpu/x86/vm/assembler_x86.cpp | 10 ++
hotspot/src/cpu/x86/vm/assembler_x86.hpp | 2 +
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 153 ++++++++++++++++++
hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp | 3 +
.../src/cpu/x86/vm/stubGenerator_x86_32.cpp | 44 +++++
.../src/cpu/x86/vm/stubGenerator_x86_64.cpp | 42 +++++
hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 5 -
hotspot/src/share/vm/asm/assembler.hpp | 3 +
hotspot/src/share/vm/asm/codeBuffer.hpp | 13 ++
.../intrinsics/IntrinsicAvailableTest.java | 2 +-
.../GenericTestCaseForUnsupportedX86CPU.java | 6 +-
11 files changed, 276 insertions(+), 7 deletions(-)
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
index ddc1acfd8..c0ae3d32a 100644
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@@ -2808,6 +2808,16 @@ void Assembler::ret(int imm16) {
}
}
+void Assembler::roll(Register dst, int imm8) {
+ assert(isShiftCount(imm8 >> 1), "illegal shift count");
+ int encode = prefix_and_encode(dst->encoding());
+ if (imm8 == 1) {
+ emit_int16((unsigned char)0xD1, (0xC0 | encode));
+ } else {
+ emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8);
+ }
+}
+
void Assembler::sahf() {
#ifdef _LP64
// Not supported in 64bit mode
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
index c2e70bc2a..1695d7969 100644
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@@ -1574,6 +1574,8 @@ private:
void ret(int imm16);
+ void roll(Register dst, int imm8);
+
#ifdef _LP64
void rorq(Register dst, int imm8);
void rorxq(Register dst, Register src, int imm8);
diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
index 1b09514c9..3aca9a30d 100644
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -8577,3 +8577,156 @@ SkipIfEqual::SkipIfEqual(
SkipIfEqual::~SkipIfEqual() {
_masm->bind(_label);
}
+
+void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address limit, bool multi_block) {
+
+ Label start, done_hash, loop0;
+
+ bind(start);
+
+ bind(loop0);
+
+ // Save hash values for addition after rounds
+ movptr(rdi, state);
+ movl(rax, Address(rdi, 0));
+ movl(rbx, Address(rdi, 4));
+ movl(rcx, Address(rdi, 8));
+ movl(rdx, Address(rdi, 12));
+
+#define FF(r1, r2, r3, r4, k, s, t) \
+ movl(rsi, r3); \
+ addl(r1, Address(buf, k*4)); \
+ xorl(rsi, r4); \
+ andl(rsi, r2); \
+ xorl(rsi, r4); \
+ leal(r1, Address(r1, rsi, Address::times_1, t)); \
+ roll(r1, s); \
+ addl(r1, r2);
+
+#define GG(r1, r2, r3, r4, k, s, t) \
+ movl(rsi, r4); \
+ movl(rdi, r4); \
+ addl(r1, Address(buf, k*4)); \
+ notl(rsi); \
+ andl(rdi, r2); \
+ andl(rsi, r3); \
+ orl(rsi, rdi); \
+ leal(r1, Address(r1, rsi, Address::times_1, t)); \
+ roll(r1, s); \
+ addl(r1, r2);
+
+#define HH(r1, r2, r3, r4, k, s, t) \
+ movl(rsi, r3); \
+ addl(r1, Address(buf, k*4)); \
+ xorl(rsi, r4); \
+ xorl(rsi, r2); \
+ leal(r1, Address(r1, rsi, Address::times_1, t)); \
+ roll(r1, s); \
+ addl(r1, r2);
+
+#define II(r1, r2, r3, r4, k, s, t) \
+ movl(rsi, r4); \
+ notl(rsi); \
+ addl(r1, Address(buf, k*4)); \
+ orl(rsi, r2); \
+ xorl(rsi, r3); \
+ leal(r1, Address(r1, rsi, Address::times_1, t)); \
+ roll(r1, s); \
+ addl(r1, r2);
+
+ // Round 1
+ FF(rax, rbx, rcx, rdx, 0, 7, 0xd76aa478)
+ FF(rdx, rax, rbx, rcx, 1, 12, 0xe8c7b756)
+ FF(rcx, rdx, rax, rbx, 2, 17, 0x242070db)
+ FF(rbx, rcx, rdx, rax, 3, 22, 0xc1bdceee)
+ FF(rax, rbx, rcx, rdx, 4, 7, 0xf57c0faf)
+ FF(rdx, rax, rbx, rcx, 5, 12, 0x4787c62a)
+ FF(rcx, rdx, rax, rbx, 6, 17, 0xa8304613)
+ FF(rbx, rcx, rdx, rax, 7, 22, 0xfd469501)
+ FF(rax, rbx, rcx, rdx, 8, 7, 0x698098d8)
+ FF(rdx, rax, rbx, rcx, 9, 12, 0x8b44f7af)
+ FF(rcx, rdx, rax, rbx, 10, 17, 0xffff5bb1)
+ FF(rbx, rcx, rdx, rax, 11, 22, 0x895cd7be)
+ FF(rax, rbx, rcx, rdx, 12, 7, 0x6b901122)
+ FF(rdx, rax, rbx, rcx, 13, 12, 0xfd987193)
+ FF(rcx, rdx, rax, rbx, 14, 17, 0xa679438e)
+ FF(rbx, rcx, rdx, rax, 15, 22, 0x49b40821)
+
+ // Round 2
+ GG(rax, rbx, rcx, rdx, 1, 5, 0xf61e2562)
+ GG(rdx, rax, rbx, rcx, 6, 9, 0xc040b340)
+ GG(rcx, rdx, rax, rbx, 11, 14, 0x265e5a51)
+ GG(rbx, rcx, rdx, rax, 0, 20, 0xe9b6c7aa)
+ GG(rax, rbx, rcx, rdx, 5, 5, 0xd62f105d)
+ GG(rdx, rax, rbx, rcx, 10, 9, 0x02441453)
+ GG(rcx, rdx, rax, rbx, 15, 14, 0xd8a1e681)
+ GG(rbx, rcx, rdx, rax, 4, 20, 0xe7d3fbc8)
+ GG(rax, rbx, rcx, rdx, 9, 5, 0x21e1cde6)
+ GG(rdx, rax, rbx, rcx, 14, 9, 0xc33707d6)
+ GG(rcx, rdx, rax, rbx, 3, 14, 0xf4d50d87)
+ GG(rbx, rcx, rdx, rax, 8, 20, 0x455a14ed)
+ GG(rax, rbx, rcx, rdx, 13, 5, 0xa9e3e905)
+ GG(rdx, rax, rbx, rcx, 2, 9, 0xfcefa3f8)
+ GG(rcx, rdx, rax, rbx, 7, 14, 0x676f02d9)
+ GG(rbx, rcx, rdx, rax, 12, 20, 0x8d2a4c8a)
+
+ // Round 3
+ HH(rax, rbx, rcx, rdx, 5, 4, 0xfffa3942)
+ HH(rdx, rax, rbx, rcx, 8, 11, 0x8771f681)
+ HH(rcx, rdx, rax, rbx, 11, 16, 0x6d9d6122)
+ HH(rbx, rcx, rdx, rax, 14, 23, 0xfde5380c)
+ HH(rax, rbx, rcx, rdx, 1, 4, 0xa4beea44)
+ HH(rdx, rax, rbx, rcx, 4, 11, 0x4bdecfa9)
+ HH(rcx, rdx, rax, rbx, 7, 16, 0xf6bb4b60)
+ HH(rbx, rcx, rdx, rax, 10, 23, 0xbebfbc70)
+ HH(rax, rbx, rcx, rdx, 13, 4, 0x289b7ec6)
+ HH(rdx, rax, rbx, rcx, 0, 11, 0xeaa127fa)
+ HH(rcx, rdx, rax, rbx, 3, 16, 0xd4ef3085)
+ HH(rbx, rcx, rdx, rax, 6, 23, 0x04881d05)
+ HH(rax, rbx, rcx, rdx, 9, 4, 0xd9d4d039)
+ HH(rdx, rax, rbx, rcx, 12, 11, 0xe6db99e5)
+ HH(rcx, rdx, rax, rbx, 15, 16, 0x1fa27cf8)
+ HH(rbx, rcx, rdx, rax, 2, 23, 0xc4ac5665)
+
+ // Round 4
+ II(rax, rbx, rcx, rdx, 0, 6, 0xf4292244)
+ II(rdx, rax, rbx, rcx, 7, 10, 0x432aff97)
+ II(rcx, rdx, rax, rbx, 14, 15, 0xab9423a7)
+ II(rbx, rcx, rdx, rax, 5, 21, 0xfc93a039)
+ II(rax, rbx, rcx, rdx, 12, 6, 0x655b59c3)
+ II(rdx, rax, rbx, rcx, 3, 10, 0x8f0ccc92)
+ II(rcx, rdx, rax, rbx, 10, 15, 0xffeff47d)
+ II(rbx, rcx, rdx, rax, 1, 21, 0x85845dd1)
+ II(rax, rbx, rcx, rdx, 8, 6, 0x6fa87e4f)
+ II(rdx, rax, rbx, rcx, 15, 10, 0xfe2ce6e0)
+ II(rcx, rdx, rax, rbx, 6, 15, 0xa3014314)
+ II(rbx, rcx, rdx, rax, 13, 21, 0x4e0811a1)
+ II(rax, rbx, rcx, rdx, 4, 6, 0xf7537e82)
+ II(rdx, rax, rbx, rcx, 11, 10, 0xbd3af235)
+ II(rcx, rdx, rax, rbx, 2, 15, 0x2ad7d2bb)
+ II(rbx, rcx, rdx, rax, 9, 21, 0xeb86d391)
+
+#undef FF
+#undef GG
+#undef HH
+#undef II
+
+ // write hash values back in the correct order
+ movptr(rdi, state);
+ addl(Address(rdi, 0), rax);
+ addl(Address(rdi, 4), rbx);
+ addl(Address(rdi, 8), rcx);
+ addl(Address(rdi, 12), rdx);
+
+ if (multi_block) {
+ // increment data pointer and loop if more to process
+ addptr(buf, 64);
+ addl(ofs, 64);
+ movl(rsi, ofs);
+ cmpl(rsi, limit);
+ jcc(Assembler::belowEqual, loop0);
+ movptr(rax, rsi); //return ofs
+ }
+
+ bind(done_hash);
+}
diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
index e94fdd7d7..c18645f18 100644
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -907,6 +907,9 @@ class MacroAssembler: public Assembler {
// computes pow(x,y). Fallback to runtime call included.
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); }
+ void fast_md5(Register buf, Address state, Address ofs, Address limit,
+ bool multi_block);
+
private:
// call runtime as a fallback for trig functions and pow/exp.
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index f555f3326..b4e3f2914 100644
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -3057,6 +3057,45 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // ofs and limit are use for multi-block byte array.
+ // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
+ address generate_md5_implCompress(bool multi_block, const char *name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ const Register buf_param = rbp;
+ const Address state_param(rsp, 0 * wordSize);
+ const Address ofs_param (rsp, 1 * wordSize);
+ const Address limit_param(rsp, 2 * wordSize);
+
+ __ enter();
+ __ push(rbx);
+ __ push(rdi);
+ __ push(rsi);
+ __ push(rbp);
+ __ subptr(rsp, 3 * wordSize);
+
+ __ movptr(rsi, Address(rbp, 8 + 4));
+ __ movptr(state_param, rsi);
+ if (multi_block) {
+ __ movptr(rsi, Address(rbp, 8 + 8));
+ __ movptr(ofs_param, rsi);
+ __ movptr(rsi, Address(rbp, 8 + 12));
+ __ movptr(limit_param, rsi);
+ }
+ __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp
+ __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block);
+
+ __ addptr(rsp, 3 * wordSize);
+ __ pop(rbp);
+ __ pop(rsi);
+ __ pop(rdi);
+ __ pop(rbx);
+ __ leave();
+ __ ret(0);
+ return start;
+ }
// byte swap x86 long
address generate_ghash_long_swap_mask() {
@@ -3525,6 +3564,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
}
+ if (UseMD5Intrinsics) {
+ StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
+ StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
+ }
+
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 254f63392..f6511b273 100644
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -3660,6 +3660,43 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // ofs and limit are use for multi-block byte array.
+ // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
+ address generate_md5_implCompress(bool multi_block, const char *name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ const Register buf_param = r15;
+ const Address state_param(rsp, 0 * wordSize);
+ const Address ofs_param (rsp, 1 * wordSize );
+ const Address limit_param(rsp, 1 * wordSize + 4);
+
+ __ enter();
+ __ push(rbx);
+ __ push(rdi);
+ __ push(rsi);
+ __ push(r15);
+ __ subptr(rsp, 2 * wordSize);
+
+ __ movptr(buf_param, c_rarg0);
+ __ movptr(state_param, c_rarg1);
+ if (multi_block) {
+ __ movl(ofs_param, c_rarg2);
+ __ movl(limit_param, c_rarg3);
+ }
+ __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block);
+
+ __ addptr(rsp, 2 * wordSize);
+ __ pop(r15);
+ __ pop(rsi);
+ __ pop(rdi);
+ __ pop(rbx);
+ __ leave();
+ __ ret(0);
+ return start;
+ }
+
// This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
// to hide instruction latency
//
@@ -4584,6 +4621,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
}
+ if (UseMD5Intrinsics) {
+ StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
+ StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
+ }
+
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
index 41f827364..ce3037d76 100644
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@@ -669,11 +669,6 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseF2jBLASIntrinsics, false);
}
- if (UseMD5Intrinsics) {
- warning("MD5 intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
- }
-
// Adjust RTM (Restricted Transactional Memory) flags
if (!supports_rtm() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp
index d4d31d47e..823653d55 100644
--- a/hotspot/src/share/vm/asm/assembler.hpp
+++ b/hotspot/src/share/vm/asm/assembler.hpp
@@ -287,6 +287,9 @@ class AbstractAssembler : public ResourceObj {
void emit_int8( int8_t x) { code_section()->emit_int8( x); }
void emit_int16( int16_t x) { code_section()->emit_int16( x); }
+ void emit_int16( uint8_t x1, uint8_t x2) { code_section()->emit_int16(x1, x2); }
+
+ void emit_int24( uint8_t x1, uint8_t x2, uint8_t x3) { code_section()->emit_int24(x1, x2, x3); }
void emit_int32( int32_t x) { code_section()->emit_int32( x); }
void emit_int64( int64_t x) { code_section()->emit_int64( x); }
diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp
index a89f2c18b..32c2f717a 100644
--- a/hotspot/src/share/vm/asm/codeBuffer.hpp
+++ b/hotspot/src/share/vm/asm/codeBuffer.hpp
@@ -195,6 +195,19 @@ class CodeSection VALUE_OBJ_CLASS_SPEC {
// Code emission
void emit_int8 ( int8_t x) { *((int8_t*) end()) = x; set_end(end() + sizeof(int8_t)); }
void emit_int16( int16_t x) { *((int16_t*) end()) = x; set_end(end() + sizeof(int16_t)); }
+ void emit_int16(uint8_t x1, uint8_t x2) {
+ address curr = end();
+ *((uint8_t*) curr++) = x1;
+ *((uint8_t*) curr++) = x2;
+ set_end(curr);
+ }
+ void emit_int24(uint8_t x1, uint8_t x2, uint8_t x3) {
+ address curr = end();
+ *((uint8_t*) curr++) = x1;
+ *((uint8_t*) curr++) = x2;
+ *((uint8_t*) curr++) = x3;
+ set_end(curr);
+ }
void emit_int32( int32_t x) { *((int32_t*) end()) = x; set_end(end() + sizeof(int32_t)); }
void emit_int64( int64_t x) { *((int64_t*) end()) = x; set_end(end() + sizeof(int64_t)); }
diff --git a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java
index 1a5475403..b52c6f523 100644
--- a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java
+++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java
@@ -26,7 +26,7 @@ import java.util.Objects;
/*
* @test
* @bug 8130832
- * @library /testlibrary /../../test/lib /compiler/whitebox /compiler/testlibrary
+ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox /compiler/testlibrary
* @build IntrinsicAvailableTest
* @run main ClassFileInstaller sun.hotspot.WhiteBox
* sun.hotspot.WhiteBox$WhiteBoxPermission
diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java
index 8a8dde4ad..a916ac746 100644
--- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java
+++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java
@@ -24,7 +24,9 @@
import com.oracle.java.testlibrary.ExitCode;
import com.oracle.java.testlibrary.Platform;
import com.oracle.java.testlibrary.cli.CommandLineOptionTest;
+import com.oracle.java.testlibrary.cli.predicate.AndPredicate;
import com.oracle.java.testlibrary.cli.predicate.OrPredicate;
+import com.oracle.java.testlibrary.cli.predicate.NotPredicate;
/**
* Generic test case for SHA-related options targeted to X86 CPUs that don't
@@ -33,7 +35,9 @@ import com.oracle.java.testlibrary.cli.predicate.OrPredicate;
public class GenericTestCaseForUnsupportedX86CPU
extends DigestOptionsBase.TestCase {
public GenericTestCaseForUnsupportedX86CPU(String optionName) {
- super(optionName, new OrPredicate(Platform::isX64, Platform::isX86));
+ super(optionName, new AndPredicate(new OrPredicate(Platform::isX64, Platform::isX86),
+ new NotPredicate(DigestOptionsBase.getPredicateForOption(
+ optionName))));
}
@Override
--
2.22.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/myx_076/openjdk-1.8.0.git
git@gitee.com:myx_076/openjdk-1.8.0.git
myx_076
openjdk-1.8.0
openjdk-1.8.0
master

搜索帮助