代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/openjdk-11 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 2af2f9a..f23b972 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -156,6 +156,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
)))
endif
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
+ )))
+ endif
+
ifeq ($(call check-jvm-feature, shenandoahgc), true)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
index 31c6965..e621402 100644
--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py
+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
@@ -73,6 +73,48 @@ class GeneralRegisterOrSp(Register):
return self.astr()
else:
return self.astr("r")
+class SVEVectorRegister(FloatRegister):
+ def __str__(self):
+ return self.astr("z")
+
+class SVEPRegister(Register):
+ def __str__(self):
+ return self.astr("p")
+
+ def generate(self):
+ self.number = random.randint(0, 15)
+ return self
+
+class SVEGoverningPRegister(Register):
+ def __str__(self):
+ return self.astr("p")
+ def generate(self):
+ self.number = random.randint(0, 7)
+ return self
+
+class RegVariant(object):
+ def __init__(self, low, high):
+ self.number = random.randint(low, high)
+
+ def astr(self):
+ nameMap = {
+ 0: ".b",
+ 1: ".h",
+ 2: ".s",
+ 3: ".d",
+ 4: ".q"
+ }
+ return nameMap.get(self.number)
+
+ def cstr(self):
+ nameMap = {
+ 0: "__ B",
+ 1: "__ H",
+ 2: "__ S",
+ 3: "__ D",
+ 4: "__ Q"
+ }
+ return nameMap.get(self.number)
class FloatZero(Operand):
@@ -88,7 +130,10 @@ class OperandFactory:
'w' : GeneralRegister,
's' : FloatRegister,
'd' : FloatRegister,
- 'z' : FloatZero}
+ 'z' : FloatZero,
+ 'p' : SVEPRegister,
+ 'P' : SVEGoverningPRegister,
+ 'Z' : SVEVectorRegister}
@classmethod
def create(cls, mode):
@@ -834,6 +879,100 @@ class FloatInstruction(Instruction):
% tuple([Instruction.astr(self)] +
[(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
+class SVEVectorOp(Instruction):
+ def __init__(self, args):
+ name = args[0]
+ regTypes = args[1]
+ regs = []
+ for c in regTypes:
+ regs.append(OperandFactory.create(c).generate())
+ self.reg = regs
+ self.numRegs = len(regs)
+ if regTypes[0] != "p" and regTypes[1] == 'P':
+ self._isPredicated = True
+ self._merge = "/m"
+ else:
+ self._isPredicated = False
+ self._merge =""
+
+ self._bitwiseop = False
+ if name[0] == 'f':
+ self._width = RegVariant(2, 3)
+ elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]):
+ self._width = RegVariant(3, 3)
+ self._bitwiseop = True
+ else:
+ self._width = RegVariant(0, 3)
+ if len(args) > 2:
+ self._dnm = args[2]
+ else:
+ self._dnm = None
+ Instruction.__init__(self, name)
+
+ def cstr(self):
+ formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"])
+ if self._bitwiseop:
+ width = []
+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
+ else:
+ width = [self._width.cstr()]
+ return (formatStr
+ % tuple(["__ sve_" + self._name + "("] +
+ [str(self.reg[0])] +
+ width +
+ [str(self.reg[i]) for i in range(1, self.numRegs)]))
+ def astr(self):
+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
+ if self._dnm == 'dn':
+ formatStr += ", %s"
+ dnReg = [str(self.reg[0]) + self._width.astr()]
+ else:
+ dnReg = []
+
+ if self._isPredicated:
+ restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)]
+ else:
+ restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)]
+ return (formatStr
+ % tuple([Instruction.astr(self)] +
+ [str(self.reg[0]) + self._width.astr()] +
+ restRegs))
+ def generate(self):
+ return self
+
+class SVEReductionOp(Instruction):
+ def __init__(self, args):
+ name = args[0]
+ lowRegType = args[1]
+ self.reg = []
+ Instruction.__init__(self, name)
+ self.reg.append(OperandFactory.create('s').generate())
+ self.reg.append(OperandFactory.create('P').generate())
+ self.reg.append(OperandFactory.create('Z').generate())
+ self._width = RegVariant(lowRegType, 3)
+ def cstr(self):
+ return "__ sve_%s(%s, %s, %s, %s);" % (self.name(),
+ str(self.reg[0]),
+ self._width.cstr(),
+ str(self.reg[1]),
+ str(self.reg[2]))
+ def astr(self):
+ if self.name() == "uaddv":
+ dstRegName = "d" + str(self.reg[0].number)
+ else:
+ dstRegName = self._width.astr()[1] + str(self.reg[0].number)
+ formatStr = "%s %s, %s, %s"
+ if self.name() == "fadda":
+ formatStr += ", %s"
+ moreReg = [dstRegName]
+ else:
+ moreReg = []
+ return formatStr % tuple([self.name()] +
+ [dstRegName] +
+ [str(self.reg[1])] +
+ moreReg +
+ [str(self.reg[2]) + self._width.astr()])
+
class LdStSIMDOp(Instruction):
def __init__(self, args):
self._name, self.regnum, self.arrangement, self.addresskind = args
@@ -1120,7 +1259,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"],
["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"],
["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"],
- ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]])
+ ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
+ # SVE instructions
+ ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"],
+ ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"],
+ ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"],
+ ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"],
+ ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"],
+ ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"],
+ ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"],
+ ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"],
+ ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"],
+ ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"],
+ ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"],
+ ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"],
+ ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"],
+ ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"],
+ ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"],
+ ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"],
+ ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"],
+ ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"],
+ ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"],
+ ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
+ ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
+ ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"],
+ ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
+ ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
+ ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"],
+ ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],
+ ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"],
+ ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"],
+ ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"],
+ ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r8));", "st1d\t{z0.d}, p4, [x0, x8, LSL #3]"],
+ ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"],
+ ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"],
+ ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"],
+])
print "\n// FloatImmediateOp"
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
@@ -1145,6 +1319,50 @@ for size in ("x", "w"):
["ldumin", "ldumin", size, suffix],
["ldumax", "ldumax", size, suffix]]);
+
+generate(SVEVectorOp, [["add", "ZZZ"],
+ ["sub", "ZZZ"],
+ ["fadd", "ZZZ"],
+ ["fmul", "ZZZ"],
+ ["fsub", "ZZZ"],
+ ["abs", "ZPZ"],
+ ["add", "ZPZ", "dn"],
+ ["asr", "ZPZ", "dn"],
+ ["cnt", "ZPZ"],
+ ["lsl", "ZPZ", "dn"],
+ ["lsr", "ZPZ", "dn"],
+ ["mul", "ZPZ", "dn"],
+ ["neg", "ZPZ"],
+ ["not", "ZPZ"],
+ ["smax", "ZPZ", "dn"],
+ ["smin", "ZPZ", "dn"],
+ ["sub", "ZPZ", "dn"],
+ ["fabs", "ZPZ"],
+ ["fadd", "ZPZ", "dn"],
+ ["fdiv", "ZPZ", "dn"],
+ ["fmax", "ZPZ", "dn"],
+ ["fmin", "ZPZ", "dn"],
+ ["fmul", "ZPZ", "dn"],
+ ["fneg", "ZPZ"],
+ ["frintm", "ZPZ"],
+ ["frintn", "ZPZ"],
+ ["frintp", "ZPZ"],
+ ["fsqrt", "ZPZ"],
+ ["fsub", "ZPZ", "dn"],
+ ["fmla", "ZPZZ"],
+ ["fmls", "ZPZZ"],
+ ["fnmla", "ZPZZ"],
+ ["fnmls", "ZPZZ"],
+ ["mla", "ZPZZ"],
+ ["mls", "ZPZZ"],
+ ["and", "ZZZ"],
+ ["eor", "ZZZ"],
+ ["orr", "ZZZ"],
+ ])
+
+generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
+ ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
+
print "\n __ bind(forth);"
outfile.write("forth:\n")
@@ -1153,8 +1371,8 @@ outfile.close()
import subprocess
import sys
-# compile for 8.1 because of lse atomics
-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"])
+# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
+subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
print
print "/*",
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index f126488..8a92ff2 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -2006,6 +2006,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
// branch if we need to invalidate the method later
__ nop();
+ if (UseSVE > 0 && C->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
+ }
+
int bangsize = C->bang_size_in_bytes();
if (C->need_stack_bang(bangsize) && UseStackBanging)
__ generate_stack_overflow_check(bangsize);
@@ -2172,8 +2176,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
if (bottom_type()->isa_vect() != NULL) {
uint ireg = ideal_reg();
- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
- if (cbuf) {
+ if (ireg == Op_VecA && cbuf) {
+ MacroAssembler _masm(cbuf);
+ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
+ // stack->stack
+ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset,
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
+ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo),
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
+ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo),
+ sve_vector_reg_size_in_bytes);
+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
+ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]),
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
+ } else {
+ ShouldNotReachHere();
+ }
+ } else if (cbuf) {
+ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
MacroAssembler _masm(cbuf);
assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
@@ -2452,15 +2476,28 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-
- // TODO
- // identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ // Identify extra cases that we might want to provide match rules for vector nodes and
+ // other intrinsics guarded with vector length (vlen) and element type (bt).
+ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ int bit_size = vlen * type2aelembytes(bt) * 8;
+ if (UseSVE == 0 && bit_size > 128) {
+ return false;
+ }
+ if (UseSVE > 0) {
+ return op_sve_supported(opcode);
+ } else { // NEON
+ // Special cases
+ switch (opcode) {
+ case Op_MulVL:
+ return false;
+ default:
+ break;
+ }
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
@@ -3812,6 +3849,12 @@ encode %{
return;
}
}
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ // Only non uncommon_trap calls need to reinitialize ptrue.
+ if (uncommon_trap_request() == 0) {
+ __ reinitialize_ptrue();
+ }
+ }
%}
enc_class aarch64_enc_java_dynamic_call(method meth) %{
@@ -3821,6 +3864,8 @@ encode %{
if (call == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
return;
+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
}
%}
@@ -3857,6 +3902,9 @@ encode %{
__ bind(retaddr);
__ add(sp, sp, 2 * wordSize);
}
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ reinitialize_ptrue();
+ }
%}
enc_class aarch64_enc_rethrow() %{
@@ -3866,6 +3914,11 @@ encode %{
enc_class aarch64_enc_ret() %{
MacroAssembler _masm(&cbuf);
+#ifdef ASSERT
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+ __ verify_ptrue();
+ }
+#endif
__ ret(lr);
%}
@@ -4607,6 +4660,41 @@ operand immLoffset16()
interface(CONST_INTER);
%}
+// 8 bit signed value.
+operand immI8()
+%{
+ predicate(n->get_int() <= 127 && n->get_int() >= -128);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immI8_shift8()
+%{
+ predicate((n->get_int() <= 127 && n->get_int() >= -128) ||
+ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+// 8 bit signed value (simm8), or #simm8 LSL 8.
+operand immL8_shift8()
+%{
+ predicate((n->get_long() <= 127 && n->get_long() >= -128) ||
+ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// 32 bit integer valid for add sub immediate
operand immIAddSub()
%{
@@ -16433,7 +16521,7 @@ instruct loadV8(vecD dst, vmem8 mem)
// Load Vector (128 bits)
instruct loadV16(vecX dst, vmem16 mem)
%{
- predicate(n->as_LoadVector()->memory_size() == 16);
+ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(4 * INSN_COST);
format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
@@ -16489,7 +16577,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src)
instruct replicate16B(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 16);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
match(Set dst (ReplicateB src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (16B)" %}
@@ -16514,7 +16602,7 @@ instruct replicate8B_imm(vecD dst, immI con)
instruct replicate16B_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 16);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
match(Set dst (ReplicateB con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(16B)" %}
@@ -16539,7 +16627,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src)
instruct replicate8S(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
match(Set dst (ReplicateS src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (8S)" %}
@@ -16564,7 +16652,7 @@ instruct replicate4S_imm(vecD dst, immI con)
instruct replicate8S_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 8);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
match(Set dst (ReplicateS con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(8H)" %}
@@ -16588,7 +16676,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src)
instruct replicate4I(vecX dst, iRegIorL2I src)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateI src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (4I)" %}
@@ -16612,7 +16700,7 @@ instruct replicate2I_imm(vecD dst, immI con)
instruct replicate4I_imm(vecX dst, immI con)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateI con));
ins_cost(INSN_COST);
format %{ "movi $dst, $con\t# vector(4I)" %}
@@ -16624,7 +16712,7 @@ instruct replicate4I_imm(vecX dst, immI con)
instruct replicate2L(vecX dst, iRegL src)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateL src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (2L)" %}
@@ -16636,7 +16724,7 @@ instruct replicate2L(vecX dst, iRegL src)
instruct replicate2L_zero(vecX dst, immI0 zero)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateI zero));
ins_cost(INSN_COST);
format %{ "movi $dst, $zero\t# vector(4I)" %}
@@ -16663,7 +16751,7 @@ instruct replicate2F(vecD dst, vRegF src)
instruct replicate4F(vecX dst, vRegF src)
%{
- predicate(n->as_Vector()->length() == 4);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
match(Set dst (ReplicateF src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (4F)" %}
@@ -16676,7 +16764,7 @@ instruct replicate4F(vecX dst, vRegF src)
instruct replicate2D(vecX dst, vRegD src)
%{
- predicate(n->as_Vector()->length() == 2);
+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
match(Set dst (ReplicateD src));
ins_cost(INSN_COST);
format %{ "dup $dst, $src\t# vector (2D)" %}
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
new file mode 100644
index 0000000..8d80cb3
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad
@@ -0,0 +1,1366 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+
+// AArch64 SVE Architecture Description File
+
+
+// 4 bit signed offset -- for predicated load/store
+
+operand vmemA_immIOffset4()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand vmemA_immLOffset4()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(ConL);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+
+operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ index(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+ bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+ int def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ const TypeVect* vt = def->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+ PRegister Pg, const Address &adr);
+
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
+ FloatRegister reg, PRegister pg, BasicType bt,
+ int opcode, Register base, int index, int size, int disp) {
+ sve_mem_insn_predicate insn = NULL;
+ Assembler::SIMD_RegVariant type = Assembler::B;
+ int esize = type2aelembytes(bt);
+ if (index == -1) {
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
+ switch(esize) {
+ case 1:
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
+ type = Assembler::B;
+ break;
+ case 2:
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
+ type = Assembler::H;
+ break;
+ case 4:
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
+ type = Assembler::S;
+ break;
+ case 8:
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
+ type = Assembler::D;
+ break;
+ default:
+ assert(false, "unsupported");
+ ShouldNotReachHere();
+ }
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+ } else {
+ assert(false, "unimplemented");
+ ShouldNotReachHere();
+ }
+ }
+
+ bool op_sve_supported(int opcode) {
+ switch (opcode) {
+ // No multiply reduction instructions
+ case Op_MulReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVI:
+ case Op_MulReductionVL:
+ // Others
+ case Op_Extract:
+ case Op_ExtractB:
+ case Op_ExtractC:
+ case Op_ExtractD:
+ case Op_ExtractF:
+ case Op_ExtractI:
+ case Op_ExtractL:
+ case Op_ExtractS:
+ case Op_ExtractUB:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+%}
+
+definitions %{
+ int_def SVE_COST (200, 200);
+%}
+
+
+
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(SVE_COST);
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
+ vector_element_basic_type(this), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(SVE_COST);
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
+ vector_element_basic_type(this, $src), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add
+
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (AddVB src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (AddVS src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (AddVI src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (AddVL src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_add(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (AddVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (AddVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fadd(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector and
+
+instruct vand(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (AndV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_and(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector or
+
+instruct vor(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (OrV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_orr(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector xor
+
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (XorV src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ sve_eor(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector float div
+
+instruct vdivF(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (DivVF dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vdivD(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (DivVD dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fmls
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmla
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fnmls
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mla
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector mls
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+// vector mul
+
+instruct vmulS(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst_src1 (MulVS dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulI(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst_src1 (MulVI dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulL(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst_src1 (MulVL dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (MulVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (MulVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fmul(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector fneg
+
+instruct vnegF(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (NegVF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vnegD(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (NegVD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fneg(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (PopCountVI src));
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
+ ins_encode %{
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector add reduction
+
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "addw $dst, $dst, $src1\t # add reduction S" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ match(Set dst (AddReductionVL src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $dst, $src1\t # add reduction D" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addF(vRegF src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst (AddReductionVF src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addD(vRegD src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst (AddReductionVD src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector replicate
+
+instruct replicateB(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (ReplicateB src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (ReplicateS src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI(vReg dst, iRegIorL2I src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateI src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL(vReg dst, iRegL src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateL src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateB_imm8(vReg dst, immI8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (ReplicateB con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (ReplicateS con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateI con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateL con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+
+instruct replicateF(vReg dst, vRegF src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (ReplicateF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct replicateD(vReg dst, vRegD src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (ReplicateD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector shift
+
+instruct vasrB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (RShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (RShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (RShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (RShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (LShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (LShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (LShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (LShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (URShiftVB dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (URShiftVS dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (URShiftVI dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (URShiftVL dst shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (RShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) con = 7;
+ __ sve_asr(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (RShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 16) con = 15;
+ __ sve_asr(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (RShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_asr(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (RShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_asr(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (URShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (URShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (URShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (URShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsr(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (LShiftVB src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (LShiftVS src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (LShiftVI src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (LShiftVL src shift));
+ ins_cost(SVE_COST);
+ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;
+ __ sve_lsl(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+ (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR)));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sqrt
+
+instruct vsqrtF(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (SqrtVF src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsqrtD(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16);
+ match(Set dst (SqrtVD src));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// vector sub
+
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
+ match(Set dst (SubVB src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
+ match(Set dst (SubVS src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (SubVI src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (SubVL src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_sub(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (SubVF src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %}
+ ins_encode %{
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
+ match(Set dst (SubVD src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %}
+ ins_encode %{
+ __ sve_fsub(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
new file mode 100644
index 0000000..0323f2f
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
@@ -0,0 +1,727 @@
+//
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+dnl Generate the warning
+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
+dnl
+
+// AArch64 SVE Architecture Description File
+
+dnl
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 )
+dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
+define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
+operand vmemA_imm$1Offset$3()
+%{
+ predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
+ Matcher::scalable_vector_reg_size(T_BYTE)));
+ match(Con$1);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}')
+dnl
+// 4 bit signed offset -- for predicated load/store
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4)
+OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
+dnl
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 )
+dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
+define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
+operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
+%{
+ constraint(ALLOC_IN_RC(ptr_reg));
+ match(AddP reg off);
+ op_cost(0);
+ format %{ "[$reg, $off, MUL VL]" %}
+ interface(MEMORY_INTER) %{
+ base($reg);
+ `index'(0xffffffff);
+ scale(0x0);
+ disp($off);
+ %}
+%}')
+dnl
+OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
+OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
+
+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
+
+source_hpp %{
+ bool op_sve_supported(int opcode);
+%}
+
+source %{
+
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
+ int def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ const TypeVect* vt = def->bottom_type()->is_vect();
+ return vt->element_basic_type();
+ }
+
+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
+ PRegister Pg, const Address &adr);
+
+ // Predicated load/store, with optional ptrue to all elements of given predicate register.
+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store,
+ FloatRegister reg, PRegister pg, BasicType bt,
+ int opcode, Register base, int index, int size, int disp) {
+ sve_mem_insn_predicate insn;
+ Assembler::SIMD_RegVariant type;
+ int esize = type2aelembytes(bt);
+ if (index == -1) {
+ assert(size == 0, "unsupported address mode: scale size = %d", size);
+ switch(esize) {
+ case 1:
+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b;
+ type = Assembler::B;
+ break;
+ case 2:
+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h;
+ type = Assembler::H;
+ break;
+ case 4:
+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w;
+ type = Assembler::S;
+ break;
+ case 8:
+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d;
+ type = Assembler::D;
+ break;
+ default:
+ assert(false, "unsupported");
+ ShouldNotReachHere();
+ }
+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
+ } else {
+ assert(false, "unimplemented");
+ ShouldNotReachHere();
+ }
+ }
+
+ bool op_sve_supported(int opcode) {
+ switch (opcode) {
+ // No multiply reduction instructions
+ case Op_MulReductionVD:
+ case Op_MulReductionVF:
+ case Op_MulReductionVI:
+ case Op_MulReductionVL:
+ // Others
+ case Op_Extract:
+ case Op_ExtractB:
+ case Op_ExtractC:
+ case Op_ExtractD:
+ case Op_ExtractF:
+ case Op_ExtractI:
+ case Op_ExtractL:
+ case Op_ExtractS:
+ case Op_ExtractUB:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+%}
+
+definitions %{
+ int_def SVE_COST (200, 200);
+%}
+
+
+dnl
+dnl ELEMENT_SHORT_CHART($1, $2)
+dnl ELEMENT_SHORT_CHART(etype, node)
+define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
+ `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
+ ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
+ `($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
+dnl
+
+// All SVE instructions
+
+// vector load/store
+
+// Use predicated vector load/store
+instruct loadV(vReg dst, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(SVE_COST);
+ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue,
+ vector_element_basic_type(this), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storeV(vReg src, vmemA mem) %{
+ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(SVE_COST);
+ format %{ "sve_str $mem, $src\t # vector (sve)" %}
+ ins_encode %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue,
+ vector_element_basic_type(this, $src), $mem->opcode(),
+ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 )
+dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+dnl
+dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 )
+dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_UNPREDICATED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector add
+BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
+BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add)
+BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add)
+BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add)
+BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd)
+BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd)
+dnl
+dnl BINARY_OP_UNSIZED($1, $2, $3, $4 )
+dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
+define(`BINARY_OP_UNSIZED', `
+instruct $1(vReg dst, vReg src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
+ match(Set dst ($2 src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %}
+ ins_encode %{
+ __ $4(as_FloatRegister($dst$$reg),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector and
+BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
+
+// vector or
+BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
+
+// vector xor
+BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
+dnl
+dnl VDIVF($1, $2 , $3 )
+dnl VDIVF(name_suffix, size, min_vec_len)
+define(`VDIVF', `
+instruct vdiv$1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (DivV$1 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector float div
+VDIVF(F, S, 4)
+VDIVF(D, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 )
+dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
+ n->bottom_type()->is_vect()->element_basic_type() == $3);
+ match(Set dst_src1 ($2 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst_src1$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+
+dnl
+dnl VFMLA($1 $2 $3 )
+dnl VFMLA(name_suffix, size, min_vec_len)
+define(`VFMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmla
+VFMLA(F, S, 4)
+VFMLA(D, D, 2)
+
+dnl
+dnl VFMLS($1 $2 $3 )
+dnl VFMLS(name_suffix, size, min_vec_len)
+define(`VFMLS', `
+// dst_src1 = dst_src1 + -src2 * src3
+// dst_src1 = dst_src1 + src2 * -src3
+instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
+ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fmls
+VFMLS(F, S, 4)
+VFMLS(D, D, 2)
+
+dnl
+dnl VFNMLA($1 $2 $3 )
+dnl VFNMLA(name_suffix, size, min_vec_len)
+define(`VFNMLA', `
+// dst_src1 = -dst_src1 + -src2 * src3
+// dst_src1 = -dst_src1 + src2 * -src3
+instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmla
+VFNMLA(F, S, 4)
+VFNMLA(D, D, 2)
+
+dnl
+dnl VFNMLS($1 $2 $3 )
+dnl VFNMLS(name_suffix, size, min_vec_len)
+define(`VFNMLS', `
+// dst_src1 = -dst_src1 + src2 * src3
+instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
+ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fnmls
+VFNMLS(F, S, 4)
+VFNMLS(D, D, 2)
+
+dnl
+dnl VMLA($1 $2 $3 )
+dnl VMLA(name_suffix, size, min_vec_len)
+define(`VMLA', `
+// dst_src1 = dst_src1 + src2 * src3
+instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mla
+VMLA(B, B, 16)
+VMLA(S, H, 8)
+VMLA(I, S, 4)
+VMLA(L, D, 2)
+
+dnl
+dnl VMLS($1 $2 $3 )
+dnl VMLS(name_suffix, size, min_vec_len)
+define(`VMLS', `
+// dst_src1 = dst_src1 - src2 * src3
+instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
+%{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
+ match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
+ ins_cost(SVE_COST);
+ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
+ ins_encode %{
+ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
+ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector mls
+VMLS(B, B, 16)
+VMLS(S, H, 8)
+VMLS(I, S, 4)
+VMLS(L, D, 2)
+
+dnl
+dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`BINARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst_src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst_src1 ($2 dst_src1 src2));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst_src1$$reg), __ $3,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector mul
+BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul)
+BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul)
+BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
+BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
+
+dnl
+dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
+define(`UNARY_OP_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector fneg
+UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
+UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
+
+// popcount vector
+
+instruct vpopcountI(vReg dst, vReg src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
+ match(Set dst (PopCountVI src));
+ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
+ ins_encode %{
+ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+dnl
+dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
+dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
+define(`REDUCE_ADD', `
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ ELEMENT_SHORT_CHAR($6, n->in(2)));
+ match(Set dst ($2 src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
+ "umov $dst, $tmp, $5, 0\n\t"
+ "$7 $dst, $dst, $src1\t # add reduction $5" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
+ __ $7($dst$$Register, $dst$$Register, $src1$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REDUCE_ADDF($1, $2, $3, $4 )
+dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
+define(`REDUCE_ADDF', `
+instruct $1($3 src1_dst, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set src1_dst ($2 src1_dst src2));
+ ins_cost(SVE_COST);
+ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// vector add reduction
+REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
+REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
+REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
+REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
+
+dnl
+dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 )
+dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
+define(`REDUCE_FMINMAX', `
+instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
+ n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
+ match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
+ "f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
+ ins_encode %{
+ __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+// vector max reduction
+REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF)
+REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
+
+// vector min reduction
+REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF)
+REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
+
+dnl
+dnl REPLICATE($1, $2, $3, $4, $5 )
+dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`REPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl REPLICATE_IMM8($1, $2, $3, $4, $5 )
+dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
+define(`REPLICATE_IMM8', `
+instruct $1(vReg dst, $3 con) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 con));
+ ins_cost(SVE_COST);
+ format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl FREPLICATE($1, $2, $3, $4, $5 )
+dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
+define(`FREPLICATE', `
+instruct $1(vReg dst, $3 src) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src));
+ ins_cost(SVE_COST);
+ format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %}
+ ins_encode %{
+ __ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
+ ptrue, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector replicate
+REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
+REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
+REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
+REPLICATE(replicateL, ReplicateL, iRegL, D, 2)
+
+REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16)
+REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
+REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
+REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
+
+FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
+FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
+dnl
+dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 )
+dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_TRUE_PREDICATE', `
+instruct $1(vReg dst, vReg shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 dst shift));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ ptrue, as_FloatRegister($shift$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+define(`VSHIFT_IMM_UNPREDICATE', `
+instruct $1(vReg dst, vReg src, immI shift) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
+ match(Set dst ($2 src shift));
+ ins_cost(SVE_COST);
+ format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+ ins_encode %{
+ int con = (int)$shift$$constant;dnl
+ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
+ if (con == 0) {
+ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')dnl
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
+ if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+ if (con >= 16) con = 15;')')dnl
+ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+ if (con >= 8) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')
+ __ $5(as_FloatRegister($dst$$reg), __ $3,
+ as_FloatRegister($src$$reg), con);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VSHIFT_COUNT($1, $2, $3, $4 )
+dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
+define(`VSHIFT_COUNT', `
+instruct $1(vReg dst, iRegIorL2I cnt) %{
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
+ ELEMENT_SHORT_CHAR($4, n));
+ match(Set dst (LShiftCntV cnt));
+ match(Set dst (RShiftCntV cnt));
+ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
+ ins_encode %{
+ __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+
+// vector shift
+VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr)
+VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr)
+VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl)
+VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
+VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
+VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
+VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
+VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
+VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG)
+
+// vector sqrt
+UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
+UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
+
+// vector sub
+BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
+BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
+BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
+BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
+BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
+BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
index 8047ed8..32e5333 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
@@ -96,662 +96,746 @@ void entry(CodeBuffer *cb) {
__ bind(back);
// ArithOp
- __ add(r15, r0, r24, Assembler::LSL, 59); // add x15, x0, x24, LSL #59
- __ sub(r17, r22, r22, Assembler::ASR, 13); // sub x17, x22, x22, ASR #13
- __ adds(r10, r26, r28, Assembler::LSL, 57); // adds x10, x26, x28, LSL #57
- __ subs(r25, r16, r24, Assembler::LSL, 18); // subs x25, x16, x24, LSL #18
- __ addw(r8, r5, r28, Assembler::LSL, 7); // add w8, w5, w28, LSL #7
- __ subw(r8, r28, r1, Assembler::ASR, 28); // sub w8, w28, w1, ASR #28
- __ addsw(r12, r2, r1, Assembler::LSL, 0); // adds w12, w2, w1, LSL #0
- __ subsw(r23, r5, r17, Assembler::LSR, 25); // subs w23, w5, w17, LSR #25
- __ andr(r21, r12, r13, Assembler::LSL, 21); // and x21, x12, x13, LSL #21
- __ orr(r21, r15, r23, Assembler::ASR, 36); // orr x21, x15, x23, ASR #36
- __ eor(r22, r24, r27, Assembler::ASR, 48); // eor x22, x24, x27, ASR #48
- __ ands(r22, r15, r2, Assembler::ASR, 52); // ands x22, x15, x2, ASR #52
- __ andw(r1, r17, r24, Assembler::ASR, 3); // and w1, w17, w24, ASR #3
- __ orrw(r5, r2, r6, Assembler::ASR, 11); // orr w5, w2, w6, ASR #11
- __ eorw(r23, r1, r5, Assembler::LSR, 12); // eor w23, w1, w5, LSR #12
- __ andsw(r0, r12, r14, Assembler::ASR, 20); // ands w0, w12, w14, ASR #20
- __ bic(r1, r6, r2, Assembler::LSR, 7); // bic x1, x6, x2, LSR #7
- __ orn(r30, r8, r4, Assembler::LSL, 47); // orn x30, x8, x4, LSL #47
- __ eon(r17, r22, r20, Assembler::ASR, 53); // eon x17, x22, x20, ASR #53
- __ bics(r29, r15, r5, Assembler::ASR, 36); // bics x29, x15, x5, ASR #36
- __ bicw(r30, r23, r29, Assembler::LSR, 27); // bic w30, w23, w29, LSR #27
- __ ornw(r12, r29, r2, Assembler::LSL, 20); // orn w12, w29, w2, LSL #20
- __ eonw(r7, r12, r6, Assembler::ASR, 4); // eon w7, w12, w6, ASR #4
- __ bicsw(r16, r13, r7, Assembler::ASR, 21); // bics w16, w13, w7, ASR #21
+ __ add(r27, r27, r14, Assembler::ASR, 25); // add x27, x27, x14, ASR #25
+ __ sub(r4, r11, r17, Assembler::LSR, 10); // sub x4, x11, x17, LSR #10
+ __ adds(r7, r17, r25, Assembler::ASR, 33); // adds x7, x17, x25, ASR #33
+ __ subs(r13, r22, r20, Assembler::ASR, 5); // subs x13, x22, x20, ASR #5
+ __ addw(r10, r28, r3, Assembler::ASR, 16); // add w10, w28, w3, ASR #16
+ __ subw(r21, r2, r6, Assembler::LSR, 15); // sub w21, w2, w6, LSR #15
+ __ addsw(r6, r0, r27, Assembler::ASR, 9); // adds w6, w0, w27, ASR #9
+ __ subsw(r5, r27, r8, Assembler::ASR, 10); // subs w5, w27, w8, ASR #10
+ __ andr(r12, r4, r7, Assembler::ASR, 39); // and x12, x4, x7, ASR #39
+ __ orr(r21, r27, r22, Assembler::LSL, 50); // orr x21, x27, x22, LSL #50
+ __ eor(r3, r21, r0, Assembler::ASR, 46); // eor x3, x21, x0, ASR #46
+ __ ands(r21, r10, r5, Assembler::ASR, 22); // ands x21, x10, x5, ASR #22
+ __ andw(r13, r21, r29, Assembler::LSL, 22); // and w13, w21, w29, LSL #22
+ __ orrw(r17, r10, r16, Assembler::LSR, 18); // orr w17, w10, w16, LSR #18
+ __ eorw(r16, r7, r23, Assembler::ASR, 27); // eor w16, w7, w23, ASR #27
+ __ andsw(r10, r6, r12, Assembler::ASR, 12); // ands w10, w6, w12, ASR #12
+ __ bic(r19, r25, r7, Assembler::LSL, 22); // bic x19, x25, x7, LSL #22
+ __ orn(r25, r2, r7, Assembler::LSL, 53); // orn x25, x2, x7, LSL #53
+ __ eon(r9, r23, r23, Assembler::ASR, 3); // eon x9, x23, x23, ASR #3
+ __ bics(r5, r6, r13, Assembler::ASR, 50); // bics x5, x6, x13, ASR #50
+ __ bicw(r15, r21, r10, Assembler::LSL, 9); // bic w15, w21, w10, LSL #9
+ __ ornw(r17, r21, r30, Assembler::ASR, 1); // orn w17, w21, w30, ASR #1
+ __ eonw(r7, r28, r29, Assembler::LSL, 19); // eon w7, w28, w29, LSL #19
+ __ bicsw(r25, r22, r22, Assembler::ASR, 12); // bics w25, w22, w22, ASR #12
// AddSubImmOp
- __ addw(r5, r17, 726u); // add w5, w17, #726
- __ addsw(r10, r16, 347u); // adds w10, w16, #347
- __ subw(r26, r5, 978u); // sub w26, w5, #978
- __ subsw(r21, r24, 689u); // subs w21, w24, #689
- __ add(r10, r16, 987u); // add x10, x16, #987
- __ adds(r15, r15, 665u); // adds x15, x15, #665
- __ sub(r24, r20, 39u); // sub x24, x20, #39
- __ subs(r10, r13, 76u); // subs x10, x13, #76
+ __ addw(r6, r26, 788u); // add w6, w26, #788
+ __ addsw(r3, r17, 490u); // adds w3, w17, #490
+ __ subw(r5, r21, 507u); // sub w5, w21, #507
+ __ subsw(r22, r27, 883u); // subs w22, w27, #883
+ __ add(r12, r8, 244u); // add x12, x8, #244
+ __ adds(r29, r8, 928u); // adds x29, x8, #928
+ __ sub(r26, r3, 642u); // sub x26, x3, #642
+ __ subs(r29, r15, 628u); // subs x29, x15, #628
// LogicalImmOp
- __ andw(r7, r19, 8388600ull); // and w7, w19, #0x7ffff8
- __ orrw(r5, r17, 4026535935ull); // orr w5, w17, #0xf0000fff
- __ eorw(r16, r28, 4186112ull); // eor w16, w28, #0x3fe000
- __ andsw(r14, r24, 7168ull); // ands w14, w24, #0x1c00
- __ andr(r14, r27, 18446744073709543551ull); // and x14, x27, #0xffffffffffffe07f
- __ orr(r12, r11, 576456354256912384ull); // orr x12, x11, #0x7fffc0000000000
- __ eor(r2, r0, 18437736874454811647ull); // eor x2, x0, #0xffe00000000003ff
- __ ands(r13, r20, 18446744073642573823ull); // ands x13, x20, #0xfffffffffc01ffff
+ __ andw(r21, r30, 4287102855ull); // and w21, w30, #0xff87ff87
+ __ orrw(r21, r12, 2139127680ull); // orr w21, w12, #0x7f807f80
+ __ eorw(r11, r17, 3233857728ull); // eor w11, w17, #0xc0c0c0c0
+ __ andsw(r26, r30, 1056980736ull); // ands w26, w30, #0x3f003f00
+ __ andr(r25, r23, 18445618178097414144ull); // and x25, x23, #0xfffc0000fffc0000
+ __ orr(r30, r14, 16429131440647569407ull); // orr x30, x14, #0xe3ffffffffffffff
+ __ eor(r26, r4, 18446744073172942847ull); // eor x26, x4, #0xffffffffe003ffff
+ __ ands(r26, r0, 18446181398634037247ull); // ands x26, x0, #0xfffe003fffffffff
// AbsOp
- __ b(__ pc()); // b .
- __ b(back); // b back
- __ b(forth); // b forth
- __ bl(__ pc()); // bl .
- __ bl(back); // bl back
- __ bl(forth); // bl forth
+ __ b(__ pc()); // b .
+ __ b(back); // b back
+ __ b(forth); // b forth
+ __ bl(__ pc()); // bl .
+ __ bl(back); // bl back
+ __ bl(forth); // bl forth
// RegAndAbsOp
- __ cbzw(r15, __ pc()); // cbz w15, .
- __ cbzw(r15, back); // cbz w15, back
- __ cbzw(r15, forth); // cbz w15, forth
- __ cbnzw(r28, __ pc()); // cbnz w28, .
- __ cbnzw(r28, back); // cbnz w28, back
- __ cbnzw(r28, forth); // cbnz w28, forth
- __ cbz(r27, __ pc()); // cbz x27, .
- __ cbz(r27, back); // cbz x27, back
- __ cbz(r27, forth); // cbz x27, forth
- __ cbnz(r0, __ pc()); // cbnz x0, .
- __ cbnz(r0, back); // cbnz x0, back
- __ cbnz(r0, forth); // cbnz x0, forth
- __ adr(r13, __ pc()); // adr x13, .
- __ adr(r13, back); // adr x13, back
- __ adr(r13, forth); // adr x13, forth
- __ _adrp(r3, __ pc()); // adrp x3, .
+ __ cbzw(r28, __ pc()); // cbz w28, .
+ __ cbzw(r28, back); // cbz w28, back
+ __ cbzw(r28, forth); // cbz w28, forth
+ __ cbnzw(r17, __ pc()); // cbnz w17, .
+ __ cbnzw(r17, back); // cbnz w17, back
+ __ cbnzw(r17, forth); // cbnz w17, forth
+ __ cbz(r25, __ pc()); // cbz x25, .
+ __ cbz(r25, back); // cbz x25, back
+ __ cbz(r25, forth); // cbz x25, forth
+ __ cbnz(r2, __ pc()); // cbnz x2, .
+ __ cbnz(r2, back); // cbnz x2, back
+ __ cbnz(r2, forth); // cbnz x2, forth
+ __ adr(r29, __ pc()); // adr x29, .
+ __ adr(r29, back); // adr x29, back
+ __ adr(r29, forth); // adr x29, forth
+ __ _adrp(r29, __ pc()); // adrp x29, .
// RegImmAbsOp
- __ tbz(r21, 7, __ pc()); // tbz x21, #7, .
- __ tbz(r21, 7, back); // tbz x21, #7, back
- __ tbz(r21, 7, forth); // tbz x21, #7, forth
- __ tbnz(r15, 9, __ pc()); // tbnz x15, #9, .
- __ tbnz(r15, 9, back); // tbnz x15, #9, back
- __ tbnz(r15, 9, forth); // tbnz x15, #9, forth
+ __ tbz(r6, 6, __ pc()); // tbz x6, #6, .
+ __ tbz(r6, 6, back); // tbz x6, #6, back
+ __ tbz(r6, 6, forth); // tbz x6, #6, forth
+ __ tbnz(r21, 2, __ pc()); // tbnz x21, #2, .
+ __ tbnz(r21, 2, back); // tbnz x21, #2, back
+ __ tbnz(r21, 2, forth); // tbnz x21, #2, forth
// MoveWideImmOp
- __ movnw(r14, 2655, 16); // movn w14, #2655, lsl 16
- __ movzw(r17, 7642, 0); // movz w17, #7642, lsl 0
- __ movkw(r27, 11381, 0); // movk w27, #11381, lsl 0
- __ movn(r1, 19524, 32); // movn x1, #19524, lsl 32
- __ movz(r20, 21126, 16); // movz x20, #21126, lsl 16
- __ movk(r20, 32462, 16); // movk x20, #32462, lsl 16
+ __ movnw(r8, 2735, 0); // movn w8, #2735, lsl 0
+ __ movzw(r11, 11185, 16); // movz w11, #11185, lsl 16
+ __ movkw(r26, 26028, 16); // movk w26, #26028, lsl 16
+ __ movn(r13, 13140, 0); // movn x13, #13140, lsl 0
+ __ movz(r6, 5829, 48); // movz x6, #5829, lsl 48
+ __ movk(r16, 10786, 32); // movk x16, #10786, lsl 32
// BitfieldOp
- __ sbfm(r13, r2, 28, 20); // sbfm x13, x2, #28, #20
- __ bfmw(r16, r20, 19, 15); // bfm w16, w20, #19, #15
- __ ubfmw(r11, r11, 9, 6); // ubfm w11, w11, #9, #6
- __ sbfm(r2, r4, 25, 21); // sbfm x2, x4, #25, #21
- __ bfm(r13, r16, 2, 19); // bfm x13, x16, #2, #19
- __ ubfm(r8, r25, 8, 5); // ubfm x8, x25, #8, #5
+ __ sbfm(r30, r30, 17, 26); // sbfm x30, x30, #17, #26
+ __ bfmw(r4, r9, 15, 12); // bfm w4, w9, #15, #12
+ __ ubfmw(r15, r20, 1, 5); // ubfm w15, w20, #1, #5
+ __ sbfm(r27, r8, 19, 14); // sbfm x27, x8, #19, #14
+ __ bfm(r30, r0, 21, 29); // bfm x30, x0, #21, #29
+ __ ubfm(r27, r26, 22, 11); // ubfm x27, x26, #22, #11
// ExtractOp
- __ extrw(r29, r27, r10, 14); // extr w29, w27, w10, #14
- __ extr(r6, r20, r6, 24); // extr x6, x20, x6, #24
+ __ extrw(r12, r12, r6, 27); // extr w12, w12, w6, #27
+ __ extr(r19, r13, r22, 45); // extr x19, x13, x22, #45
// CondBranchOp
- __ br(Assembler::EQ, __ pc()); // b.EQ .
- __ br(Assembler::EQ, back); // b.EQ back
- __ br(Assembler::EQ, forth); // b.EQ forth
- __ br(Assembler::NE, __ pc()); // b.NE .
- __ br(Assembler::NE, back); // b.NE back
- __ br(Assembler::NE, forth); // b.NE forth
- __ br(Assembler::HS, __ pc()); // b.HS .
- __ br(Assembler::HS, back); // b.HS back
- __ br(Assembler::HS, forth); // b.HS forth
- __ br(Assembler::CS, __ pc()); // b.CS .
- __ br(Assembler::CS, back); // b.CS back
- __ br(Assembler::CS, forth); // b.CS forth
- __ br(Assembler::LO, __ pc()); // b.LO .
- __ br(Assembler::LO, back); // b.LO back
- __ br(Assembler::LO, forth); // b.LO forth
- __ br(Assembler::CC, __ pc()); // b.CC .
- __ br(Assembler::CC, back); // b.CC back
- __ br(Assembler::CC, forth); // b.CC forth
- __ br(Assembler::MI, __ pc()); // b.MI .
- __ br(Assembler::MI, back); // b.MI back
- __ br(Assembler::MI, forth); // b.MI forth
- __ br(Assembler::PL, __ pc()); // b.PL .
- __ br(Assembler::PL, back); // b.PL back
- __ br(Assembler::PL, forth); // b.PL forth
- __ br(Assembler::VS, __ pc()); // b.VS .
- __ br(Assembler::VS, back); // b.VS back
- __ br(Assembler::VS, forth); // b.VS forth
- __ br(Assembler::VC, __ pc()); // b.VC .
- __ br(Assembler::VC, back); // b.VC back
- __ br(Assembler::VC, forth); // b.VC forth
- __ br(Assembler::HI, __ pc()); // b.HI .
- __ br(Assembler::HI, back); // b.HI back
- __ br(Assembler::HI, forth); // b.HI forth
- __ br(Assembler::LS, __ pc()); // b.LS .
- __ br(Assembler::LS, back); // b.LS back
- __ br(Assembler::LS, forth); // b.LS forth
- __ br(Assembler::GE, __ pc()); // b.GE .
- __ br(Assembler::GE, back); // b.GE back
- __ br(Assembler::GE, forth); // b.GE forth
- __ br(Assembler::LT, __ pc()); // b.LT .
- __ br(Assembler::LT, back); // b.LT back
- __ br(Assembler::LT, forth); // b.LT forth
- __ br(Assembler::GT, __ pc()); // b.GT .
- __ br(Assembler::GT, back); // b.GT back
- __ br(Assembler::GT, forth); // b.GT forth
- __ br(Assembler::LE, __ pc()); // b.LE .
- __ br(Assembler::LE, back); // b.LE back
- __ br(Assembler::LE, forth); // b.LE forth
- __ br(Assembler::AL, __ pc()); // b.AL .
- __ br(Assembler::AL, back); // b.AL back
- __ br(Assembler::AL, forth); // b.AL forth
- __ br(Assembler::NV, __ pc()); // b.NV .
- __ br(Assembler::NV, back); // b.NV back
- __ br(Assembler::NV, forth); // b.NV forth
+ __ br(Assembler::EQ, __ pc()); // b.EQ .
+ __ br(Assembler::EQ, back); // b.EQ back
+ __ br(Assembler::EQ, forth); // b.EQ forth
+ __ br(Assembler::NE, __ pc()); // b.NE .
+ __ br(Assembler::NE, back); // b.NE back
+ __ br(Assembler::NE, forth); // b.NE forth
+ __ br(Assembler::HS, __ pc()); // b.HS .
+ __ br(Assembler::HS, back); // b.HS back
+ __ br(Assembler::HS, forth); // b.HS forth
+ __ br(Assembler::CS, __ pc()); // b.CS .
+ __ br(Assembler::CS, back); // b.CS back
+ __ br(Assembler::CS, forth); // b.CS forth
+ __ br(Assembler::LO, __ pc()); // b.LO .
+ __ br(Assembler::LO, back); // b.LO back
+ __ br(Assembler::LO, forth); // b.LO forth
+ __ br(Assembler::CC, __ pc()); // b.CC .
+ __ br(Assembler::CC, back); // b.CC back
+ __ br(Assembler::CC, forth); // b.CC forth
+ __ br(Assembler::MI, __ pc()); // b.MI .
+ __ br(Assembler::MI, back); // b.MI back
+ __ br(Assembler::MI, forth); // b.MI forth
+ __ br(Assembler::PL, __ pc()); // b.PL .
+ __ br(Assembler::PL, back); // b.PL back
+ __ br(Assembler::PL, forth); // b.PL forth
+ __ br(Assembler::VS, __ pc()); // b.VS .
+ __ br(Assembler::VS, back); // b.VS back
+ __ br(Assembler::VS, forth); // b.VS forth
+ __ br(Assembler::VC, __ pc()); // b.VC .
+ __ br(Assembler::VC, back); // b.VC back
+ __ br(Assembler::VC, forth); // b.VC forth
+ __ br(Assembler::HI, __ pc()); // b.HI .
+ __ br(Assembler::HI, back); // b.HI back
+ __ br(Assembler::HI, forth); // b.HI forth
+ __ br(Assembler::LS, __ pc()); // b.LS .
+ __ br(Assembler::LS, back); // b.LS back
+ __ br(Assembler::LS, forth); // b.LS forth
+ __ br(Assembler::GE, __ pc()); // b.GE .
+ __ br(Assembler::GE, back); // b.GE back
+ __ br(Assembler::GE, forth); // b.GE forth
+ __ br(Assembler::LT, __ pc()); // b.LT .
+ __ br(Assembler::LT, back); // b.LT back
+ __ br(Assembler::LT, forth); // b.LT forth
+ __ br(Assembler::GT, __ pc()); // b.GT .
+ __ br(Assembler::GT, back); // b.GT back
+ __ br(Assembler::GT, forth); // b.GT forth
+ __ br(Assembler::LE, __ pc()); // b.LE .
+ __ br(Assembler::LE, back); // b.LE back
+ __ br(Assembler::LE, forth); // b.LE forth
+ __ br(Assembler::AL, __ pc()); // b.AL .
+ __ br(Assembler::AL, back); // b.AL back
+ __ br(Assembler::AL, forth); // b.AL forth
+ __ br(Assembler::NV, __ pc()); // b.NV .
+ __ br(Assembler::NV, back); // b.NV back
+ __ br(Assembler::NV, forth); // b.NV forth
// ImmOp
- __ svc(26948); // svc #26948
- __ hvc(29998); // hvc #29998
- __ smc(10437); // smc #10437
- __ brk(30290); // brk #30290
- __ hlt(20851); // hlt #20851
+ __ svc(16084); // svc #16084
+ __ hvc(5802); // hvc #5802
+ __ smc(14039); // smc #14039
+ __ brk(11389); // brk #11389
+ __ hlt(27339); // hlt #27339
// Op
- __ nop(); // nop
- __ eret(); // eret
- __ drps(); // drps
- __ isb(); // isb
+ __ nop(); // nop
+ __ eret(); // eret
+ __ drps(); // drps
+ __ isb(); // isb
// SystemOp
- __ dsb(Assembler::LD); // dsb LD
- __ dmb(Assembler::ISH); // dmb ISH
+ __ dsb(Assembler::OSH); // dsb OSH
+ __ dmb(Assembler::NSHST); // dmb NSHST
// OneRegOp
- __ br(r9); // br x9
- __ blr(r9); // blr x9
+ __ br(r11); // br x11
+ __ blr(r25); // blr x25
// LoadStoreExclusiveOp
- __ stxr(r2, r29, r11); // stxr w2, x29, [x11]
- __ stlxr(r22, r5, r28); // stlxr w22, x5, [x28]
- __ ldxr(r14, r20); // ldxr x14, [x20]
- __ ldaxr(r29, r19); // ldaxr x29, [x19]
- __ stlr(r6, r21); // stlr x6, [x21]
- __ ldar(r19, r3); // ldar x19, [x3]
+ __ stxr(r14, r15, r13); // stxr w14, x15, [x13]
+ __ stlxr(r30, r25, r1); // stlxr w30, x25, [x1]
+ __ ldxr(r13, r3); // ldxr x13, [x3]
+ __ ldaxr(r8, r21); // ldaxr x8, [x21]
+ __ stlr(r13, r28); // stlr x13, [x28]
+ __ ldar(r8, r30); // ldar x8, [x30]
// LoadStoreExclusiveOp
- __ stxrw(r12, r3, r27); // stxr w12, w3, [x27]
- __ stlxrw(r17, r26, r15); // stlxr w17, w26, [x15]
- __ ldxrw(r13, r14); // ldxr w13, [x14]
- __ ldaxrw(r12, r26); // ldaxr w12, [x26]
- __ stlrw(r8, r17); // stlr w8, [x17]
- __ ldarw(r21, r30); // ldar w21, [x30]
+ __ stxrw(r13, r17, r28); // stxr w13, w17, [x28]
+ __ stlxrw(r21, r17, r19); // stlxr w21, w17, [x19]
+ __ ldxrw(r3, r8); // ldxr w3, [x8]
+ __ ldaxrw(r29, r21); // ldaxr w29, [x21]
+ __ stlrw(r9, r24); // stlr w9, [x24]
+ __ ldarw(r2, r6); // ldar w2, [x6]
// LoadStoreExclusiveOp
- __ stxrh(r0, r15, r11); // stxrh w0, w15, [x11]
- __ stlxrh(r17, r20, r1); // stlxrh w17, w20, [x1]
- __ ldxrh(r29, r8); // ldxrh w29, [x8]
- __ ldaxrh(r17, r12); // ldaxrh w17, [x12]
- __ stlrh(r11, r4); // stlrh w11, [x4]
- __ ldarh(r16, r4); // ldarh w16, [x4]
+ __ stxrh(r12, r20, r16); // stxrh w12, w20, [x16]
+ __ stlxrh(r2, r28, r5); // stlxrh w2, w28, [x5]
+ __ ldxrh(r1, r3); // ldxrh w1, [x3]
+ __ ldaxrh(r24, r13); // ldaxrh w24, [x13]
+ __ stlrh(r15, r25); // stlrh w15, [x25]
+ __ ldarh(r10, r20); // ldarh w10, [x20]
// LoadStoreExclusiveOp
- __ stxrb(r14, r5, r4); // stxrb w14, w5, [x4]
- __ stlxrb(r27, r17, r16); // stlxrb w27, w17, [x16]
- __ ldxrb(r6, r27); // ldxrb w6, [x27]
- __ ldaxrb(r27, r24); // ldaxrb w27, [x24]
- __ stlrb(r10, r20); // stlrb w10, [x20]
- __ ldarb(r9, r26); // ldarb w9, [x26]
+ __ stxrb(r5, r16, r13); // stxrb w5, w16, [x13]
+ __ stlxrb(r10, r15, r17); // stlxrb w10, w15, [x17]
+ __ ldxrb(r17, r19); // ldxrb w17, [x19]
+ __ ldaxrb(r30, r9); // ldaxrb w30, [x9]
+ __ stlrb(r20, r24); // stlrb w20, [x24]
+ __ ldarb(r10, r4); // ldarb w10, [x4]
// LoadStoreExclusiveOp
- __ ldxp(r5, r30, r28); // ldxp x5, x30, [x28]
- __ ldaxp(r10, r9, r19); // ldaxp x10, x9, [x19]
- __ stxp(r11, r16, r21, r12); // stxp w11, x16, x21, [x12]
- __ stlxp(r10, r20, r23, r4); // stlxp w10, x20, x23, [x4]
+ __ ldxp(r25, r8, r9); // ldxp x25, x8, [x9]
+ __ ldaxp(r7, r10, r16); // ldaxp x7, x10, [x16]
+ __ stxp(r25, r16, r11, r9); // stxp w25, x16, x11, [x9]
+ __ stlxp(r7, r5, r9, r15); // stlxp w7, x5, x9, [x15]
// LoadStoreExclusiveOp
- __ ldxpw(r22, r1, r0); // ldxp w22, w1, [x0]
- __ ldaxpw(r3, r1, r8); // ldaxp w3, w1, [x8]
- __ stxpw(r0, r9, r23, r30); // stxp w0, w9, w23, [x30]
- __ stlxpw(r23, r0, r17, r11); // stlxp w23, w0, w17, [x11]
+ __ ldxpw(r12, r4, r3); // ldxp w12, w4, [x3]
+ __ ldaxpw(r17, r2, r5); // ldaxp w17, w2, [x5]
+ __ stxpw(r4, r8, r24, r6); // stxp w4, w8, w24, [x6]
+ __ stlxpw(r4, r12, r25, r16); // stlxp w4, w12, w25, [x16]
-// base_plus_unscaled_offset
+// base_plus_unscaled_offset
// LoadStoreOp
- __ str(r6, Address(r10, -31)); // str x6, [x10, -31]
- __ strw(r7, Address(r0, -5)); // str w7, [x0, -5]
- __ strb(r5, Address(r16, -13)); // strb w5, [x16, -13]
- __ strh(r30, Address(r19, 31)); // strh w30, [x19, 31]
- __ ldr(r16, Address(r9, 119)); // ldr x16, [x9, 119]
- __ ldrw(r8, Address(r16, 59)); // ldr w8, [x16, 59]
- __ ldrb(r10, Address(r12, -7)); // ldrb w10, [x12, -7]
- __ ldrh(r14, Address(r9, -38)); // ldrh w14, [x9, -38]
- __ ldrsb(r24, Address(r30, -8)); // ldrsb x24, [x30, -8]
- __ ldrsh(r7, Address(r4, 23)); // ldrsh x7, [x4, 23]
- __ ldrshw(r17, Address(r14, -39)); // ldrsh w17, [x14, -39]
- __ ldrsw(r11, Address(r27, -31)); // ldrsw x11, [x27, -31]
- __ ldrd(v12, Address(r7, 65)); // ldr d12, [x7, 65]
- __ ldrs(v0, Address(r16, -2)); // ldr s0, [x16, -2]
- __ strd(v13, Address(r23, -161)); // str d13, [x23, -161]
- __ strs(v21, Address(r3, -62)); // str s21, [x3, -62]
-
-// pre
+ __ str(r14, Address(r30, 11)); // str x14, [x30, 11]
+ __ strw(r6, Address(r29, -97)); // str w6, [x29, -97]
+ __ strb(r2, Address(r11, -7)); // strb w2, [x11, -7]
+ __ strh(r20, Address(r8, -22)); // strh w20, [x8, -22]
+ __ ldr(r20, Address(r29, -29)); // ldr x20, [x29, -29]
+ __ ldrw(r9, Address(r0, -26)); // ldr w9, [x0, -26]
+ __ ldrb(r14, Address(r2, 8)); // ldrb w14, [x2, 8]
+ __ ldrh(r13, Address(r1, -24)); // ldrh w13, [x1, -24]
+ __ ldrsb(r13, Address(r17, -7)); // ldrsb x13, [x17, -7]
+ __ ldrsh(r17, Address(r7, -11)); // ldrsh x17, [x7, -11]
+ __ ldrshw(r3, Address(r8, -60)); // ldrsh w3, [x8, -60]
+ __ ldrsw(r14, Address(r12, 12)); // ldrsw x14, [x12, 12]
+ __ ldrd(v5, Address(r21, -235)); // ldr d5, [x21, -235]
+ __ ldrs(v9, Address(r0, -54)); // ldr s9, [x0, -54]
+ __ strd(v15, Address(r8, 95)); // str d15, [x8, 95]
+ __ strs(v22, Address(r0, -16)); // str s22, [x0, -16]
+
+// pre
// LoadStoreOp
- __ str(r2, Address(__ pre(r5, 100))); // str x2, [x5, 100]!
- __ strw(r9, Address(__ pre(r1, -92))); // str w9, [x1, -92]!
- __ strb(r27, Address(__ pre(r30, -5))); // strb w27, [x30, -5]!
- __ strh(r27, Address(__ pre(r15, 12))); // strh w27, [x15, 12]!
- __ ldr(r4, Address(__ pre(r17, -212))); // ldr x4, [x17, -212]!
- __ ldrw(r21, Address(__ pre(r23, 30))); // ldr w21, [x23, 30]!
- __ ldrb(r13, Address(__ pre(r17, -7))); // ldrb w13, [x17, -7]!
- __ ldrh(r25, Address(__ pre(r0, -50))); // ldrh w25, [x0, -50]!
- __ ldrsb(r1, Address(__ pre(r21, -21))); // ldrsb x1, [x21, -21]!
- __ ldrsh(r28, Address(__ pre(r21, -54))); // ldrsh x28, [x21, -54]!
- __ ldrshw(r11, Address(__ pre(r4, 2))); // ldrsh w11, [x4, 2]!
- __ ldrsw(r17, Address(__ pre(r9, 61))); // ldrsw x17, [x9, 61]!
- __ ldrd(v29, Address(__ pre(r19, 39))); // ldr d29, [x19, 39]!
- __ ldrs(v22, Address(__ pre(r22, -85))); // ldr s22, [x22, -85]!
- __ strd(v9, Address(__ pre(r25, -225))); // str d9, [x25, -225]!
- __ strs(v9, Address(__ pre(r2, -15))); // str s9, [x2, -15]!
-
-// post
+ __ str(r23, Address(__ pre(r4, -239))); // str x23, [x4, -239]!
+ __ strw(r17, Address(__ pre(r0, -122))); // str w17, [x0, -122]!
+ __ strb(r26, Address(__ pre(r9, -5))); // strb w26, [x9, -5]!
+ __ strh(r21, Address(__ pre(r14, -8))); // strh w21, [x14, -8]!
+ __ ldr(r8, Address(__ pre(r7, 23))); // ldr x8, [x7, 23]!
+ __ ldrw(r12, Address(__ pre(r8, 22))); // ldr w12, [x8, 22]!
+ __ ldrb(r27, Address(__ pre(r28, 6))); // ldrb w27, [x28, 6]!
+ __ ldrh(r6, Address(__ pre(r19, -58))); // ldrh w6, [x19, -58]!
+ __ ldrsb(r7, Address(__ pre(r5, -20))); // ldrsb x7, [x5, -20]!
+ __ ldrsh(r22, Address(__ pre(r17, -32))); // ldrsh x22, [x17, -32]!
+ __ ldrshw(r17, Address(__ pre(r13, -2))); // ldrsh w17, [x13, -2]!
+ __ ldrsw(r29, Address(__ pre(r4, 22))); // ldrsw x29, [x4, 22]!
+ __ ldrd(v8, Address(__ pre(r28, -78))); // ldr d8, [x28, -78]!
+ __ ldrs(v23, Address(__ pre(r11, -5))); // ldr s23, [x11, -5]!
+ __ strd(v9, Address(__ pre(r20, -23))); // str d9, [x20, -23]!
+ __ strs(v5, Address(__ pre(r3, -103))); // str s5, [x3, -103]!
+
+// post
// LoadStoreOp
- __ str(r13, Address(__ post(r23, -66))); // str x13, [x23], -66
- __ strw(r17, Address(__ post(r16, 10))); // str w17, [x16], 10
- __ strb(r1, Address(__ post(r14, -32))); // strb w1, [x14], -32
- __ strh(r17, Address(__ post(r0, 6))); // strh w17, [x0], 6
- __ ldr(r27, Address(__ post(r25, -172))); // ldr x27, [x25], -172
- __ ldrw(r13, Address(__ post(r25, -38))); // ldr w13, [x25], -38
- __ ldrb(r11, Address(__ post(r25, -29))); // ldrb w11, [x25], -29
- __ ldrh(r30, Address(__ post(r5, 20))); // ldrh w30, [x5], 20
- __ ldrsb(r9, Address(__ post(r7, -7))); // ldrsb x9, [x7], -7
- __ ldrsh(r0, Address(__ post(r3, -62))); // ldrsh x0, [x3], -62
- __ ldrshw(r7, Address(__ post(r14, 31))); // ldrsh w7, [x14], 31
- __ ldrsw(r17, Address(__ post(r27, 39))); // ldrsw x17, [x27], 39
- __ ldrd(v17, Address(__ post(r4, -235))); // ldr d17, [x4], -235
- __ ldrs(v26, Address(__ post(r21, 34))); // ldr s26, [x21], 34
- __ strd(v5, Address(__ post(r17, -57))); // str d5, [x17], -57
- __ strs(v13, Address(__ post(r13, -109))); // str s13, [x13], -109
-
-// base_plus_reg
+ __ str(r13, Address(__ post(r2, 32))); // str x13, [x2], 32
+ __ strw(r30, Address(__ post(r19, 57))); // str w30, [x19], 57
+ __ strb(r29, Address(__ post(r1, 3))); // strb w29, [x1], 3
+ __ strh(r10, Address(__ post(r26, -17))); // strh w10, [x26], -17
+ __ ldr(r15, Address(__ post(r1, -12))); // ldr x15, [x1], -12
+ __ ldrw(r1, Address(__ post(r5, -6))); // ldr w1, [x5], -6
+ __ ldrb(r24, Address(__ post(r15, -14))); // ldrb w24, [x15], -14
+ __ ldrh(r29, Address(__ post(r17, -5))); // ldrh w29, [x17], -5
+ __ ldrsb(r4, Address(__ post(r15, -17))); // ldrsb x4, [x15], -17
+ __ ldrsh(r6, Address(__ post(r2, -54))); // ldrsh x6, [x2], -54
+ __ ldrshw(r27, Address(__ post(r27, 18))); // ldrsh w27, [x27], 18
+ __ ldrsw(r9, Address(__ post(r25, -77))); // ldrsw x9, [x25], -77
+ __ ldrd(v21, Address(__ post(r29, -176))); // ldr d21, [x29], -176
+ __ ldrs(v30, Address(__ post(r9, -50))); // ldr s30, [x9], -50
+ __ strd(v2, Address(__ post(r12, -46))); // str d2, [x12], -46
+ __ strs(v7, Address(__ post(r21, -59))); // str s7, [x21], -59
+
+// base_plus_reg
// LoadStoreOp
- __ str(r6, Address(r16, r4, Address::sxtw(3))); // str x6, [x16, w4, sxtw #3]
- __ strw(r9, Address(r24, r20, Address::sxtw(2))); // str w9, [x24, w20, sxtw #2]
- __ strb(r3, Address(r29, r3, Address::lsl(0))); // strb w3, [x29, x3, lsl #0]
- __ strh(r10, Address(r17, r30, Address::lsl(0))); // strh w10, [x17, x30, lsl #0]
- __ ldr(r27, Address(r11, r7, Address::uxtw(0))); // ldr x27, [x11, w7, uxtw #0]
- __ ldrw(r14, Address(r15, r25, Address::uxtw(0))); // ldr w14, [x15, w25, uxtw #0]
- __ ldrb(r24, Address(r14, r19, Address::lsl(0))); // ldrb w24, [x14, x19, lsl #0]
- __ ldrh(r16, Address(r0, r6, Address::sxtw(0))); // ldrh w16, [x0, w6, sxtw #0]
- __ ldrsb(r10, Address(r12, r12, Address::sxtw(0))); // ldrsb x10, [x12, w12, sxtw #0]
- __ ldrsh(r26, Address(r12, r16, Address::uxtw(0))); // ldrsh x26, [x12, w16, uxtw #0]
- __ ldrshw(r26, Address(r0, r14, Address::lsl(1))); // ldrsh w26, [x0, x14, lsl #1]
- __ ldrsw(r17, Address(r11, r27, Address::sxtx(2))); // ldrsw x17, [x11, x27, sxtx #2]
- __ ldrd(v3, Address(r0, r19, Address::sxtw(3))); // ldr d3, [x0, w19, sxtw #3]
- __ ldrs(v26, Address(r15, r9, Address::lsl(2))); // ldr s26, [x15, x9, lsl #2]
- __ strd(v11, Address(r13, r16, Address::sxtx(0))); // str d11, [x13, x16, sxtx #0]
- __ strs(v26, Address(r19, r21, Address::uxtw(2))); // str s26, [x19, w21, uxtw #2]
-
-// base_plus_scaled_offset
+ __ str(r12, Address(r12, r23, Address::uxtw(3))); // str x12, [x12, w23, uxtw #3]
+ __ strw(r14, Address(r30, r22, Address::uxtw(0))); // str w14, [x30, w22, uxtw #0]
+ __ strb(r12, Address(r3, r26, Address::uxtw(0))); // strb w12, [x3, w26, uxtw #0]
+ __ strh(r11, Address(r14, r30, Address::uxtw(0))); // strh w11, [x14, w30, uxtw #0]
+ __ ldr(r24, Address(r8, r24, Address::lsl(3))); // ldr x24, [x8, x24, lsl #3]
+ __ ldrw(r12, Address(r13, r20, Address::lsl(0))); // ldr w12, [x13, x20, lsl #0]
+ __ ldrb(r22, Address(r4, r5, Address::uxtw(0))); // ldrb w22, [x4, w5, uxtw #0]
+ __ ldrh(r28, Address(r5, r6, Address::uxtw(1))); // ldrh w28, [x5, w6, uxtw #1]
+ __ ldrsb(r2, Address(r11, r25, Address::lsl(0))); // ldrsb x2, [x11, x25, lsl #0]
+ __ ldrsh(r23, Address(r22, r25, Address::sxtx(0))); // ldrsh x23, [x22, x25, sxtx #0]
+ __ ldrshw(r3, Address(r20, r22, Address::sxtw(1))); // ldrsh w3, [x20, w22, sxtw #1]
+ __ ldrsw(r9, Address(r29, r14, Address::sxtx(2))); // ldrsw x9, [x29, x14, sxtx #2]
+ __ ldrd(v16, Address(r23, r29, Address::sxtx(3))); // ldr d16, [x23, x29, sxtx #3]
+ __ ldrs(v7, Address(r28, r20, Address::lsl(2))); // ldr s7, [x28, x20, lsl #2]
+ __ strd(v20, Address(r20, r24, Address::lsl(3))); // str d20, [x20, x24, lsl #3]
+ __ strs(v25, Address(r21, r23, Address::lsl(2))); // str s25, [x21, x23, lsl #2]
+
+// base_plus_scaled_offset
// LoadStoreOp
- __ str(r8, Address(r21, 12552)); // str x8, [x21, 12552]
- __ strw(r10, Address(r27, 6380)); // str w10, [x27, 6380]
- __ strb(r27, Address(r14, 1733)); // strb w27, [x14, 1733]
- __ strh(r16, Address(r7, 3424)); // strh w16, [x7, 3424]
- __ ldr(r27, Address(r9, 12520)); // ldr x27, [x9, 12520]
- __ ldrw(r24, Address(r10, 6680)); // ldr w24, [x10, 6680]
- __ ldrb(r24, Address(r24, 1743)); // ldrb w24, [x24, 1743]
- __ ldrh(r20, Address(r5, 3072)); // ldrh w20, [x5, 3072]
- __ ldrsb(r17, Address(r4, 1570)); // ldrsb x17, [x4, 1570]
- __ ldrsh(r14, Address(r13, 3392)); // ldrsh x14, [x13, 3392]
- __ ldrshw(r10, Address(r25, 3722)); // ldrsh w10, [x25, 3722]
- __ ldrsw(r2, Address(r26, 6160)); // ldrsw x2, [x26, 6160]
- __ ldrd(v26, Address(r14, 14912)); // ldr d26, [x14, 14912]
- __ ldrs(v28, Address(r4, 7804)); // ldr s28, [x4, 7804]
- __ strd(v14, Address(r19, 13984)); // str d14, [x19, 13984]
- __ strs(v23, Address(r28, 6364)); // str s23, [x28, 6364]
-
-// pcrel
+ __ str(r17, Address(r2, 12312)); // str x17, [x2, 12312]
+ __ strw(r30, Address(r7, 6968)); // str w30, [x7, 6968]
+ __ strb(r3, Address(r7, 1833)); // strb w3, [x7, 1833]
+ __ strh(r7, Address(r1, 3366)); // strh w7, [x1, 3366]
+ __ ldr(r7, Address(r27, 14664)); // ldr x7, [x27, 14664]
+ __ ldrw(r8, Address(r17, 6156)); // ldr w8, [x17, 6156]
+ __ ldrb(r0, Address(r17, 1594)); // ldrb w0, [x17, 1594]
+ __ ldrh(r0, Address(r20, 3562)); // ldrh w0, [x20, 3562]
+ __ ldrsb(r19, Address(r17, 1681)); // ldrsb x19, [x17, 1681]
+ __ ldrsh(r19, Address(r6, 3776)); // ldrsh x19, [x6, 3776]
+ __ ldrshw(r4, Address(r10, 3708)); // ldrsh w4, [x10, 3708]
+ __ ldrsw(r29, Address(r4, 6948)); // ldrsw x29, [x4, 6948]
+ __ ldrd(v29, Address(r1, 13352)); // ldr d29, [x1, 13352]
+ __ ldrs(v15, Address(r28, 6544)); // ldr s15, [x28, 6544]
+ __ strd(v7, Address(r26, 14112)); // str d7, [x26, 14112]
+ __ strs(v0, Address(r30, 6820)); // str s0, [x30, 6820]
+
+// pcrel
// LoadStoreOp
- __ ldr(r8, forth); // ldr x8, forth
- __ ldrw(r17, back); // ldr w17, back
+ __ ldr(r25, __ pc()); // ldr x25, .
+ __ ldrw(r9, __ pc()); // ldr w9, .
// LoadStoreOp
- __ prfm(Address(r4, -175)); // prfm PLDL1KEEP, [x4, -175]
+ __ prfm(Address(r22, 105)); // prfm PLDL1KEEP, [x22, 105]
// LoadStoreOp
- __ prfm(__ pc()); // prfm PLDL1KEEP, .
+ __ prfm(back); // prfm PLDL1KEEP, back
// LoadStoreOp
- __ prfm(Address(r8, r4, Address::sxtw(0))); // prfm PLDL1KEEP, [x8, w4, sxtw #0]
+ __ prfm(Address(r28, r30, Address::lsl(3))); // prfm PLDL1KEEP, [x28, x30, lsl #3]
// LoadStoreOp
- __ prfm(Address(r12, 13248)); // prfm PLDL1KEEP, [x12, 13248]
+ __ prfm(Address(r19, 14592)); // prfm PLDL1KEEP, [x19, 14592]
// AddSubCarryOp
- __ adcw(r20, r27, r21); // adc w20, w27, w21
- __ adcsw(r7, r17, r6); // adcs w7, w17, w6
- __ sbcw(r5, r6, r25); // sbc w5, w6, w25
- __ sbcsw(r30, r11, r14); // sbcs w30, w11, w14
- __ adc(r3, r17, r11); // adc x3, x17, x11
- __ adcs(r25, r10, r17); // adcs x25, x10, x17
- __ sbc(r7, r16, r23); // sbc x7, x16, x23
- __ sbcs(r4, r10, r5); // sbcs x4, x10, x5
+ __ adcw(r17, r8, r24); // adc w17, w8, w24
+ __ adcsw(r14, r17, r9); // adcs w14, w17, w9
+ __ sbcw(r22, r1, r25); // sbc w22, w1, w25
+ __ sbcsw(r15, r9, r19); // sbcs w15, w9, w19
+ __ adc(r15, r20, r11); // adc x15, x20, x11
+ __ adcs(r4, r11, r30); // adcs x4, x11, x30
+ __ sbc(r20, r8, r6); // sbc x20, x8, x6
+ __ sbcs(r10, r21, r15); // sbcs x10, x21, x15
// AddSubExtendedOp
- __ addw(r9, r30, r9, ext::uxtx, 4); // add w9, w30, w9, uxtx #4
- __ addsw(r0, r5, r16, ext::sxth, 2); // adds w0, w5, w16, sxth #2
- __ sub(r15, r29, r27, ext::sxtb, 2); // sub x15, x29, x27, sxtb #2
- __ subsw(r11, r9, r1, ext::sxtx, 4); // subs w11, w9, w1, sxtx #4
- __ add(r2, r24, r6, ext::uxtw, 3); // add x2, x24, x6, uxtw #3
- __ adds(r19, r6, r26, ext::uxtx, 4); // adds x19, x6, x26, uxtx #4
- __ sub(r8, r26, r25, ext::sxtx, 3); // sub x8, x26, x25, sxtx #3
- __ subs(r26, r20, r9, ext::uxth, 4); // subs x26, x20, x9, uxth #4
+ __ addw(r1, r11, r9, ext::sxtb, 3); // add w1, w11, w9, sxtb #3
+ __ addsw(r4, r17, r28, ext::sxtx, 3); // adds w4, w17, w28, sxtx #3
+ __ sub(r21, r29, r28, ext::sxth, 1); // sub x21, x29, x28, sxth #1
+ __ subsw(r11, r28, r23, ext::sxtw, 4); // subs w11, w28, w23, sxtw #4
+ __ add(r12, r26, r5, ext::sxtx, 4); // add x12, x26, x5, sxtx #4
+ __ adds(r17, r6, r30, ext::uxtx, 2); // adds x17, x6, x30, uxtx #2
+ __ sub(r7, r20, r1, ext::uxtb, 2); // sub x7, x20, x1, uxtb #2
+ __ subs(r30, r9, r30, ext::sxtx, 3); // subs x30, x9, x30, sxtx #3
// ConditionalCompareOp
- __ ccmnw(r13, r26, 7u, Assembler::MI); // ccmn w13, w26, #7, MI
- __ ccmpw(r8, r20, 15u, Assembler::LO); // ccmp w8, w20, #15, LO
- __ ccmn(r22, r3, 8u, Assembler::EQ); // ccmn x22, x3, #8, EQ
- __ ccmp(r2, r24, 10u, Assembler::GE); // ccmp x2, x24, #10, GE
+ __ ccmnw(r4, r11, 7u, Assembler::EQ); // ccmn w4, w11, #7, EQ
+ __ ccmpw(r15, r5, 11u, Assembler::VC); // ccmp w15, w5, #11, VC
+ __ ccmn(r23, r17, 6u, Assembler::CS); // ccmn x23, x17, #6, CS
+ __ ccmp(r11, r11, 10u, Assembler::GT); // ccmp x11, x11, #10, GT
// ConditionalCompareImmedOp
- __ ccmnw(r8, 16, 13, Assembler::MI); // ccmn w8, #16, #13, MI
- __ ccmpw(r16, 12, 1, Assembler::EQ); // ccmp w16, #12, #1, EQ
- __ ccmn(r15, 31, 3, Assembler::VC); // ccmn x15, #31, #3, VC
- __ ccmp(r23, 12, 15, Assembler::EQ); // ccmp x23, #12, #15, EQ
+ __ ccmnw(r14, 5, 12, Assembler::NE); // ccmn w14, #5, #12, NE
+ __ ccmpw(r23, 28, 15, Assembler::NE); // ccmp w23, #28, #15, NE
+ __ ccmn(r17, 30, 7, Assembler::LO); // ccmn x17, #30, #7, LO
+ __ ccmp(r30, 12, 0, Assembler::HI); // ccmp x30, #12, #0, HI
// ConditionalSelectOp
- __ cselw(r14, r7, r26, Assembler::LO); // csel w14, w7, w26, LO
- __ csincw(r3, r27, r30, Assembler::LE); // csinc w3, w27, w30, LE
- __ csinvw(r11, r21, r23, Assembler::EQ); // csinv w11, w21, w23, EQ
- __ csnegw(r26, r30, r21, Assembler::GT); // csneg w26, w30, w21, GT
- __ csel(r28, r26, r13, Assembler::HI); // csel x28, x26, x13, HI
- __ csinc(r17, r3, r16, Assembler::LS); // csinc x17, x3, x16, LS
- __ csinv(r11, r5, r3, Assembler::HI); // csinv x11, x5, x3, HI
- __ csneg(r1, r3, r19, Assembler::GT); // csneg x1, x3, x19, GT
+ __ cselw(r26, r27, r1, Assembler::PL); // csel w26, w27, w1, PL
+ __ csincw(r14, r11, r21, Assembler::LE); // csinc w14, w11, w21, LE
+ __ csinvw(r30, r6, r15, Assembler::VS); // csinv w30, w6, w15, VS
+ __ csnegw(r17, r2, r25, Assembler::PL); // csneg w17, w2, w25, PL
+ __ csel(r16, r5, r7, Assembler::HI); // csel x16, x5, x7, HI
+ __ csinc(r10, r20, r28, Assembler::GT); // csinc x10, x20, x28, GT
+ __ csinv(r6, r7, r1, Assembler::HI); // csinv x6, x7, x1, HI
+ __ csneg(r22, r26, r17, Assembler::CS); // csneg x22, x26, x17, CS
// TwoRegOp
- __ rbitw(r0, r9); // rbit w0, w9
- __ rev16w(r26, r14); // rev16 w26, w14
- __ revw(r13, r17); // rev w13, w17
- __ clzw(r11, r20); // clz w11, w20
- __ clsw(r28, r17); // cls w28, w17
- __ rbit(r13, r4); // rbit x13, x4
- __ rev16(r1, r30); // rev16 x1, x30
- __ rev32(r13, r14); // rev32 x13, x14
- __ rev(r5, r8); // rev x5, x8
- __ clz(r2, r25); // clz x2, x25
- __ cls(r20, r8); // cls x20, x8
+ __ rbitw(r11, r6); // rbit w11, w6
+ __ rev16w(r0, r27); // rev16 w0, w27
+ __ revw(r1, r29); // rev w1, w29
+ __ clzw(r20, r21); // clz w20, w21
+ __ clsw(r12, r12); // cls w12, w12
+ __ rbit(r24, r19); // rbit x24, x19
+ __ rev16(r23, r15); // rev16 x23, x15
+ __ rev32(r17, r1); // rev32 x17, x1
+ __ rev(r27, r3); // rev x27, x3
+ __ clz(r30, r5); // clz x30, x5
+ __ cls(r15, r0); // cls x15, x0
// ThreeRegOp
- __ udivw(r21, r25, r27); // udiv w21, w25, w27
- __ sdivw(r13, r10, r16); // sdiv w13, w10, w16
- __ lslvw(r28, r1, r17); // lslv w28, w1, w17
- __ lsrvw(r25, r23, r10); // lsrv w25, w23, w10
- __ asrvw(r7, r3, r7); // asrv w7, w3, w7
- __ rorvw(r14, r30, r14); // rorv w14, w30, w14
- __ udiv(r12, r22, r15); // udiv x12, x22, x15
- __ sdiv(r2, r25, r13); // sdiv x2, x25, x13
- __ lslv(r7, r23, r21); // lslv x7, x23, x21
- __ lsrv(r11, r12, r0); // lsrv x11, x12, x0
- __ asrv(r30, r9, r28); // asrv x30, x9, x28
- __ rorv(r13, r5, r22); // rorv x13, x5, x22
- __ umulh(r5, r21, r4); // umulh x5, x21, x4
- __ smulh(r17, r2, r7); // smulh x17, x2, x7
+ __ udivw(r14, r0, r20); // udiv w14, w0, w20
+ __ sdivw(r27, r12, r21); // sdiv w27, w12, w21
+ __ lslvw(r12, r10, r26); // lslv w12, w10, w26
+ __ lsrvw(r14, r19, r6); // lsrv w14, w19, w6
+ __ asrvw(r27, r19, r30); // asrv w27, w19, w30
+ __ rorvw(r6, r14, r16); // rorv w6, w14, w16
+ __ udiv(r17, r13, r2); // udiv x17, x13, x2
+ __ sdiv(r0, r29, r2); // sdiv x0, x29, x2
+ __ lslv(r12, r16, r2); // lslv x12, x16, x2
+ __ lsrv(r9, r23, r29); // lsrv x9, x23, x29
+ __ asrv(r6, r17, r29); // asrv x6, x17, x29
+ __ rorv(r14, r30, r26); // rorv x14, x30, x26
+ __ umulh(r17, r24, r26); // umulh x17, x24, x26
+ __ smulh(r20, r26, r14); // smulh x20, x26, x14
// FourRegMulOp
- __ maddw(r12, r12, r17, r12); // madd w12, w12, w17, w12
- __ msubw(r30, r15, r1, r27); // msub w30, w15, w1, w27
- __ madd(r2, r19, r17, r29); // madd x2, x19, x17, x29
- __ msub(r4, r23, r3, r30); // msub x4, x23, x3, x30
- __ smaddl(r15, r23, r17, r15); // smaddl x15, w23, w17, x15
- __ smsubl(r27, r12, r1, r13); // smsubl x27, w12, w1, x13
- __ umaddl(r6, r13, r12, r17); // umaddl x6, w13, w12, x17
- __ umsubl(r25, r1, r6, r10); // umsubl x25, w1, w6, x10
+ __ maddw(r27, r8, r2, r7); // madd w27, w8, w2, w7
+ __ msubw(r28, r13, r25, r12); // msub w28, w13, w25, w12
+ __ madd(r4, r9, r10, r27); // madd x4, x9, x10, x27
+ __ msub(r22, r10, r8, r30); // msub x22, x10, x8, x30
+ __ smaddl(r20, r20, r25, r5); // smaddl x20, w20, w25, x5
+ __ smsubl(r22, r22, r11, r27); // smsubl x22, w22, w11, x27
+ __ umaddl(r4, r6, r12, r19); // umaddl x4, w6, w12, x19
+ __ umsubl(r17, r15, r8, r0); // umsubl x17, w15, w8, x0
// ThreeRegFloatOp
- __ fmuls(v17, v3, v4); // fmul s17, s3, s4
- __ fdivs(v16, v5, v21); // fdiv s16, s5, s21
- __ fadds(v3, v27, v17); // fadd s3, s27, s17
- __ fsubs(v25, v10, v15); // fsub s25, s10, s15
- __ fmuls(v10, v17, v0); // fmul s10, s17, s0
- __ fmuld(v28, v26, v3); // fmul d28, d26, d3
- __ fdivd(v4, v0, v27); // fdiv d4, d0, d27
- __ faddd(v28, v14, v2); // fadd d28, d14, d2
- __ fsubd(v12, v26, v23); // fsub d12, d26, d23
- __ fmuld(v15, v30, v1); // fmul d15, d30, d1
+ __ fmuls(v22, v5, v10); // fmul s22, s5, s10
+ __ fdivs(v4, v8, v16); // fdiv s4, s8, s16
+ __ fadds(v25, v8, v6); // fadd s25, s8, s6
+ __ fsubs(v6, v27, v25); // fsub s6, s27, s25
+ __ fmuls(v10, v23, v9); // fmul s10, s23, s9
+ __ fmuld(v22, v17, v12); // fmul d22, d17, d12
+ __ fdivd(v11, v0, v20); // fdiv d11, d0, d20
+ __ faddd(v0, v12, v15); // fadd d0, d12, d15
+ __ fsubd(v15, v22, v4); // fsub d15, d22, d4
+ __ fmuld(v29, v1, v25); // fmul d29, d1, d25
// FourRegFloatOp
- __ fmadds(v4, v5, v5, v13); // fmadd s4, s5, s5, s13
- __ fmsubs(v21, v13, v28, v1); // fmsub s21, s13, s28, s1
- __ fnmadds(v17, v3, v29, v7); // fnmadd s17, s3, s29, s7
- __ fnmadds(v23, v25, v29, v26); // fnmadd s23, s25, s29, s26
- __ fmaddd(v14, v7, v30, v26); // fmadd d14, d7, d30, d26
- __ fmsubd(v22, v7, v10, v9); // fmsub d22, d7, d10, d9
- __ fnmaddd(v7, v7, v14, v9); // fnmadd d7, d7, d14, d9
- __ fnmaddd(v14, v24, v15, v24); // fnmadd d14, d24, d15, d24
+ __ fmadds(v9, v27, v19, v5); // fmadd s9, s27, s19, s5
+ __ fmsubs(v26, v3, v6, v26); // fmsub s26, s3, s6, s26
+ __ fnmadds(v4, v12, v8, v20); // fnmadd s4, s12, s8, s20
+ __ fnmadds(v25, v9, v21, v17); // fnmadd s25, s9, s21, s17
+ __ fmaddd(v7, v3, v30, v22); // fmadd d7, d3, d30, d22
+ __ fmsubd(v1, v27, v10, v10); // fmsub d1, d27, d10, d10
+ __ fnmaddd(v17, v8, v22, v1); // fnmadd d17, d8, d22, d1
+ __ fnmaddd(v14, v28, v2, v27); // fnmadd d14, d28, d2, d27
// TwoRegFloatOp
- __ fmovs(v22, v2); // fmov s22, s2
- __ fabss(v0, v3); // fabs s0, s3
- __ fnegs(v9, v17); // fneg s9, s17
- __ fsqrts(v24, v11); // fsqrt s24, s11
- __ fcvts(v15, v25); // fcvt d15, s25
- __ fmovd(v4, v3); // fmov d4, d3
- __ fabsd(v26, v22); // fabs d26, d22
- __ fnegd(v30, v19); // fneg d30, d19
- __ fsqrtd(v12, v14); // fsqrt d12, d14
- __ fcvtd(v17, v7); // fcvt s17, d7
+ __ fmovs(v27, v30); // fmov s27, s30
+ __ fabss(v5, v1); // fabs s5, s1
+ __ fnegs(v23, v19); // fneg s23, s19
+ __ fsqrts(v28, v17); // fsqrt s28, s17
+ __ fcvts(v25, v6); // fcvt d25, s6
+ __ fmovd(v20, v14); // fmov d20, d14
+ __ fabsd(v17, v10); // fabs d17, d10
+ __ fnegd(v10, v17); // fneg d10, d17
+ __ fsqrtd(v21, v17); // fsqrt d21, d17
+ __ fcvtd(v21, v15); // fcvt s21, d15
// FloatConvertOp
- __ fcvtzsw(r24, v14); // fcvtzs w24, s14
- __ fcvtzs(r13, v26); // fcvtzs x13, s26
- __ fcvtzdw(r2, v1); // fcvtzs w2, d1
- __ fcvtzd(r5, v11); // fcvtzs x5, d11
- __ scvtfws(v14, r19); // scvtf s14, w19
- __ scvtfs(v1, r22); // scvtf s1, x22
- __ scvtfwd(v27, r17); // scvtf d27, w17
- __ scvtfd(v22, r9); // scvtf d22, x9
- __ fmovs(r14, v3); // fmov w14, s3
- __ fmovd(r12, v17); // fmov x12, d17
- __ fmovs(v8, r27); // fmov s8, w27
- __ fmovd(v29, r28); // fmov d29, x28
+ __ fcvtzsw(r7, v11); // fcvtzs w7, s11
+ __ fcvtzs(r2, v29); // fcvtzs x2, s29
+ __ fcvtzdw(r3, v25); // fcvtzs w3, d25
+ __ fcvtzd(r28, v8); // fcvtzs x28, d8
+ __ scvtfws(v11, r3); // scvtf s11, w3
+ __ scvtfs(v2, r21); // scvtf s2, x21
+ __ scvtfwd(v29, r25); // scvtf d29, w25
+ __ scvtfd(v19, r3); // scvtf d19, x3
+ __ fmovs(r20, v29); // fmov w20, s29
+ __ fmovd(r23, v17); // fmov x23, d17
+ __ fmovs(v0, r28); // fmov s0, w28
+ __ fmovd(v9, r20); // fmov d9, x20
// TwoRegFloatOp
- __ fcmps(v0, v30); // fcmp s0, s30
- __ fcmpd(v12, v9); // fcmp d12, d9
- __ fcmps(v10, 0.0); // fcmp s10, #0.0
- __ fcmpd(v25, 0.0); // fcmp d25, #0.0
+ __ fcmps(v7, v12); // fcmp s7, s12
+ __ fcmpd(v13, v14); // fcmp d13, d14
+ __ fcmps(v12, 0.0); // fcmp s12, #0.0
+ __ fcmpd(v1, 0.0); // fcmp d1, #0.0
// LoadStorePairOp
- __ stpw(r8, r30, Address(r27, -144)); // stp w8, w30, [x27, #-144]
- __ ldpw(r21, r19, Address(r24, 80)); // ldp w21, w19, [x24, #80]
- __ ldpsw(r16, r27, Address(r2, -240)); // ldpsw x16, x27, [x2, #-240]
- __ stp(r21, r5, Address(r6, -128)); // stp x21, x5, [x6, #-128]
- __ ldp(r29, r25, Address(r28, -32)); // ldp x29, x25, [x28, #-32]
+ __ stpw(r12, r2, Address(r22, -64)); // stp w12, w2, [x22, #-64]
+ __ ldpw(r27, r9, Address(r24, -208)); // ldp w27, w9, [x24, #-208]
+ __ ldpsw(r15, r4, Address(r24, -176)); // ldpsw x15, x4, [x24, #-176]
+ __ stp(r5, r21, Address(r0, 16)); // stp x5, x21, [x0, #16]
+ __ ldp(r6, r23, Address(r9, -208)); // ldp x6, x23, [x9, #-208]
// LoadStorePairOp
- __ stpw(r8, r13, Address(__ pre(r0, 128))); // stp w8, w13, [x0, #128]!
- __ ldpw(r25, r20, Address(__ pre(r1, -160))); // ldp w25, w20, [x1, #-160]!
- __ ldpsw(r14, r24, Address(__ pre(r22, -32))); // ldpsw x14, x24, [x22, #-32]!
- __ stp(r17, r1, Address(__ pre(r6, 80))); // stp x17, x1, [x6, #80]!
- __ ldp(r21, r17, Address(__ pre(r25, -64))); // ldp x21, x17, [x25, #-64]!
+ __ stpw(r0, r3, Address(__ pre(r29, 16))); // stp w0, w3, [x29, #16]!
+ __ ldpw(r29, r16, Address(__ pre(r1, -144))); // ldp w29, w16, [x1, #-144]!
+ __ ldpsw(r27, r19, Address(__ pre(r21, 16))); // ldpsw x27, x19, [x21, #16]!
+ __ stp(r6, r17, Address(__ pre(r13, -176))); // stp x6, x17, [x13, #-176]!
+ __ ldp(r0, r24, Address(__ pre(r1, 16))); // ldp x0, x24, [x1, #16]!
// LoadStorePairOp
- __ stpw(r17, r21, Address(__ post(r20, -128))); // stp w17, w21, [x20], #-128
- __ ldpw(r28, r28, Address(__ post(r2, 64))); // ldp w28, w28, [x2], #64
- __ ldpsw(r19, r30, Address(__ post(r10, -256))); // ldpsw x19, x30, [x10], #-256
- __ stp(r17, r15, Address(__ post(r17, -16))); // stp x17, x15, [x17], #-16
- __ ldp(r17, r0, Address(__ post(r25, -32))); // ldp x17, x0, [x25], #-32
+ __ stpw(r0, r20, Address(__ post(r22, 0))); // stp w0, w20, [x22], #0
+ __ ldpw(r17, r12, Address(__ post(r14, -48))); // ldp w17, w12, [x14], #-48
+ __ ldpsw(r10, r26, Address(__ post(r1, 112))); // ldpsw x10, x26, [x1], #112
+ __ stp(r20, r24, Address(__ post(r13, -96))); // stp x20, x24, [x13], #-96
+ __ ldp(r1, r12, Address(__ post(r7, 48))); // ldp x1, x12, [x7], #48
// LoadStorePairOp
- __ stnpw(r14, r5, Address(r24, -32)); // stnp w14, w5, [x24, #-32]
- __ ldnpw(r23, r19, Address(r1, 112)); // ldnp w23, w19, [x1, #112]
- __ stnp(r11, r6, Address(r14, 64)); // stnp x11, x6, [x14, #64]
- __ ldnp(r2, r11, Address(r27, -224)); // ldnp x2, x11, [x27, #-224]
+ __ stnpw(r5, r10, Address(r23, -80)); // stnp w5, w10, [x23, #-80]
+ __ ldnpw(r8, r10, Address(r24, -48)); // ldnp w8, w10, [x24, #-48]
+ __ stnp(r11, r15, Address(r11, 64)); // stnp x11, x15, [x11, #64]
+ __ ldnp(r9, r28, Address(r5, 64)); // ldnp x9, x28, [x5, #64]
// LdStSIMDOp
- __ ld1(v16, __ T8B, Address(r17)); // ld1 {v16.8B}, [x17]
- __ ld1(v29, v30, __ T16B, Address(__ post(r9, 32))); // ld1 {v29.16B, v30.16B}, [x9], 32
- __ ld1(v30, v31, v0, __ T1D, Address(__ post(r24, r21))); // ld1 {v30.1D, v31.1D, v0.1D}, [x24], x21
- __ ld1(v0, v1, v2, v3, __ T8H, Address(__ post(r2, 64))); // ld1 {v0.8H, v1.8H, v2.8H, v3.8H}, [x2], 64
- __ ld1r(v20, __ T8B, Address(r9)); // ld1r {v20.8B}, [x9]
- __ ld1r(v17, __ T4S, Address(__ post(r0, 4))); // ld1r {v17.4S}, [x0], 4
- __ ld1r(v21, __ T1D, Address(__ post(r22, r26))); // ld1r {v21.1D}, [x22], x26
- __ ld2(v19, v20, __ T2D, Address(r25)); // ld2 {v19.2D, v20.2D}, [x25]
- __ ld2(v10, v11, __ T4H, Address(__ post(r5, 16))); // ld2 {v10.4H, v11.4H}, [x5], 16
- __ ld2r(v10, v11, __ T16B, Address(r24)); // ld2r {v10.16B, v11.16B}, [x24]
- __ ld2r(v13, v14, __ T2S, Address(__ post(r29, 8))); // ld2r {v13.2S, v14.2S}, [x29], 8
- __ ld2r(v22, v23, __ T2D, Address(__ post(r28, r2))); // ld2r {v22.2D, v23.2D}, [x28], x2
- __ ld3(v30, v31, v0, __ T4S, Address(__ post(r4, r11))); // ld3 {v30.4S, v31.4S, v0.4S}, [x4], x11
- __ ld3(v29, v30, v31, __ T2S, Address(r0)); // ld3 {v29.2S, v30.2S, v31.2S}, [x0]
- __ ld3r(v23, v24, v25, __ T8H, Address(r27)); // ld3r {v23.8H, v24.8H, v25.8H}, [x27]
- __ ld3r(v3, v4, v5, __ T4S, Address(__ post(r10, 12))); // ld3r {v3.4S, v4.4S, v5.4S}, [x10], 12
- __ ld3r(v19, v20, v21, __ T1D, Address(__ post(r14, r22))); // ld3r {v19.1D, v20.1D, v21.1D}, [x14], x22
- __ ld4(v14, v15, v16, v17, __ T8H, Address(__ post(r0, 64))); // ld4 {v14.8H, v15.8H, v16.8H, v17.8H}, [x0], 64
- __ ld4(v30, v31, v0, v1, __ T8B, Address(__ post(r22, r25))); // ld4 {v30.8B, v31.8B, v0.8B, v1.8B}, [x22], x25
- __ ld4r(v25, v26, v27, v28, __ T8B, Address(r0)); // ld4r {v25.8B, v26.8B, v27.8B, v28.8B}, [x0]
- __ ld4r(v10, v11, v12, v13, __ T4H, Address(__ post(r8, 8))); // ld4r {v10.4H, v11.4H, v12.4H, v13.4H}, [x8], 8
- __ ld4r(v1, v2, v3, v4, __ T2S, Address(__ post(r6, r28))); // ld4r {v1.2S, v2.2S, v3.2S, v4.2S}, [x6], x28
+ __ ld1(v11, __ T8B, Address(r20)); // ld1 {v11.8B}, [x20]
+ __ ld1(v19, v20, __ T16B, Address(__ post(r8, 32))); // ld1 {v19.16B, v20.16B}, [x8], 32
+ __ ld1(v3, v4, v5, __ T1D, Address(__ post(r2, r3))); // ld1 {v3.1D, v4.1D, v5.1D}, [x2], x3
+ __ ld1(v21, v22, v23, v24, __ T8H, Address(__ post(r3, 64))); // ld1 {v21.8H, v22.8H, v23.8H, v24.8H}, [x3], 64
+ __ ld1r(v14, __ T8B, Address(r5)); // ld1r {v14.8B}, [x5]
+ __ ld1r(v13, __ T4S, Address(__ post(r27, 4))); // ld1r {v13.4S}, [x27], 4
+ __ ld1r(v17, __ T1D, Address(__ post(r19, r0))); // ld1r {v17.1D}, [x19], x0
+ __ ld2(v27, v28, __ T2D, Address(r5)); // ld2 {v27.2D, v28.2D}, [x5]
+ __ ld2(v26, v27, __ T4H, Address(__ post(r4, 16))); // ld2 {v26.4H, v27.4H}, [x4], 16
+ __ ld2r(v8, v9, __ T16B, Address(r23)); // ld2r {v8.16B, v9.16B}, [x23]
+ __ ld2r(v14, v15, __ T2S, Address(__ post(r10, 8))); // ld2r {v14.2S, v15.2S}, [x10], 8
+ __ ld2r(v10, v11, __ T2D, Address(__ post(r21, r19))); // ld2r {v10.2D, v11.2D}, [x21], x19
+ __ ld3(v17, v18, v19, __ T4S, Address(__ post(r14, r30))); // ld3 {v17.4S, v18.4S, v19.4S}, [x14], x30
+ __ ld3(v20, v21, v22, __ T2S, Address(r20)); // ld3 {v20.2S, v21.2S, v22.2S}, [x20]
+ __ ld3r(v24, v25, v26, __ T8H, Address(r21)); // ld3r {v24.8H, v25.8H, v26.8H}, [x21]
+ __ ld3r(v26, v27, v28, __ T4S, Address(__ post(r1, 12))); // ld3r {v26.4S, v27.4S, v28.4S}, [x1], 12
+ __ ld3r(v12, v13, v14, __ T1D, Address(__ post(r2, r0))); // ld3r {v12.1D, v13.1D, v14.1D}, [x2], x0
+ __ ld4(v21, v22, v23, v24, __ T8H, Address(__ post(r6, 64))); // ld4 {v21.8H, v22.8H, v23.8H, v24.8H}, [x6], 64
+ __ ld4(v17, v18, v19, v20, __ T8B, Address(__ post(r28, r22))); // ld4 {v17.8B, v18.8B, v19.8B, v20.8B}, [x28], x22
+ __ ld4r(v19, v20, v21, v22, __ T8B, Address(r25)); // ld4r {v19.8B, v20.8B, v21.8B, v22.8B}, [x25]
+ __ ld4r(v6, v7, v8, v9, __ T4H, Address(__ post(r23, 8))); // ld4r {v6.4H, v7.4H, v8.4H, v9.4H}, [x23], 8
+ __ ld4r(v8, v9, v10, v11, __ T2S, Address(__ post(r9, r26))); // ld4r {v8.2S, v9.2S, v10.2S, v11.2S}, [x9], x26
// SpecialCases
- __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
- __ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
- __ ccmp(zr, 1, 4u, Assembler::NE); // ccmp xzr, 1, #4, NE
- __ ccmpw(zr, 2, 2, Assembler::GT); // ccmp wzr, 2, #2, GT
- __ extr(zr, zr, zr, 0); // extr xzr, xzr, xzr, 0
- __ stlxp(r0, zr, zr, sp); // stlxp w0, xzr, xzr, [sp]
- __ stlxpw(r2, zr, zr, r3); // stlxp w2, wzr, wzr, [x3]
- __ stxp(r4, zr, zr, r5); // stxp w4, xzr, xzr, [x5]
- __ stxpw(r6, zr, zr, sp); // stxp w6, wzr, wzr, [sp]
- __ dup(v0, __ T16B, zr); // dup v0.16b, wzr
- __ mov(v1, __ T1D, 0, zr); // mov v1.d[0], xzr
- __ mov(v1, __ T2S, 1, zr); // mov v1.s[1], wzr
- __ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
- __ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
- __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
+ __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
+ __ ccmnw(zr, zr, 5u, Assembler::EQ); // ccmn wzr, wzr, #5, EQ
+ __ ccmp(zr, 1, 4u, Assembler::NE); // ccmp xzr, 1, #4, NE
+ __ ccmpw(zr, 2, 2, Assembler::GT); // ccmp wzr, 2, #2, GT
+ __ extr(zr, zr, zr, 0); // extr xzr, xzr, xzr, 0
+ __ stlxp(r0, zr, zr, sp); // stlxp w0, xzr, xzr, [sp]
+ __ stlxpw(r2, zr, zr, r3); // stlxp w2, wzr, wzr, [x3]
+ __ stxp(r4, zr, zr, r5); // stxp w4, xzr, xzr, [x5]
+ __ stxpw(r6, zr, zr, sp); // stxp w6, wzr, wzr, [sp]
+ __ dup(v0, __ T16B, zr); // dup v0.16b, wzr
+ __ mov(v1, __ T1D, 0, zr); // mov v1.d[0], xzr
+ __ mov(v1, __ T2S, 1, zr); // mov v1.s[1], wzr
+ __ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
+ __ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
+ __ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
+ __ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1
+ __ sve_inc(r0, __ S); // incw x0
+ __ sve_dec(r1, __ H); // dech x1
+ __ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7
+ __ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15
+ __ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31
+ __ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63
+ __ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7
+ __ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15
+ __ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31
+ __ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63
+ __ sve_addvl(sp, r0, 31); // addvl sp, x0, #31
+ __ sve_addpl(r1, sp, -32); // addpl x1, sp, -32
+ __ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b
+ __ sve_dup(z0, __ B, 127); // dup z0.b, 127
+ __ sve_dup(z1, __ H, -128); // dup z1.h, -128
+ __ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
+ __ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
+ __ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
+ __ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
+ __ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
+ __ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8]
+ __ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2]
+ __ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3]
+ __ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp]
+ __ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL]
+ __ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL]
+ __ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1]
+ __ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1]
+ __ sve_st1d(z0, __ D, p4, Address(r0, r8)); // st1d {z0.d}, p4, [x0, x8, LSL #3]
+ __ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
+ __ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
+ __ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
// FloatImmediateOp
- __ fmovd(v0, 2.0); // fmov d0, #2.0
- __ fmovd(v0, 2.125); // fmov d0, #2.125
- __ fmovd(v0, 4.0); // fmov d0, #4.0
- __ fmovd(v0, 4.25); // fmov d0, #4.25
- __ fmovd(v0, 8.0); // fmov d0, #8.0
- __ fmovd(v0, 8.5); // fmov d0, #8.5
- __ fmovd(v0, 16.0); // fmov d0, #16.0
- __ fmovd(v0, 17.0); // fmov d0, #17.0
- __ fmovd(v0, 0.125); // fmov d0, #0.125
- __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
- __ fmovd(v0, 0.25); // fmov d0, #0.25
- __ fmovd(v0, 0.265625); // fmov d0, #0.265625
- __ fmovd(v0, 0.5); // fmov d0, #0.5
- __ fmovd(v0, 0.53125); // fmov d0, #0.53125
- __ fmovd(v0, 1.0); // fmov d0, #1.0
- __ fmovd(v0, 1.0625); // fmov d0, #1.0625
- __ fmovd(v0, -2.0); // fmov d0, #-2.0
- __ fmovd(v0, -2.125); // fmov d0, #-2.125
- __ fmovd(v0, -4.0); // fmov d0, #-4.0
- __ fmovd(v0, -4.25); // fmov d0, #-4.25
- __ fmovd(v0, -8.0); // fmov d0, #-8.0
- __ fmovd(v0, -8.5); // fmov d0, #-8.5
- __ fmovd(v0, -16.0); // fmov d0, #-16.0
- __ fmovd(v0, -17.0); // fmov d0, #-17.0
- __ fmovd(v0, -0.125); // fmov d0, #-0.125
- __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
- __ fmovd(v0, -0.25); // fmov d0, #-0.25
- __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
- __ fmovd(v0, -0.5); // fmov d0, #-0.5
- __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
- __ fmovd(v0, -1.0); // fmov d0, #-1.0
- __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
+ __ fmovd(v0, 2.0); // fmov d0, #2.0
+ __ fmovd(v0, 2.125); // fmov d0, #2.125
+ __ fmovd(v0, 4.0); // fmov d0, #4.0
+ __ fmovd(v0, 4.25); // fmov d0, #4.25
+ __ fmovd(v0, 8.0); // fmov d0, #8.0
+ __ fmovd(v0, 8.5); // fmov d0, #8.5
+ __ fmovd(v0, 16.0); // fmov d0, #16.0
+ __ fmovd(v0, 17.0); // fmov d0, #17.0
+ __ fmovd(v0, 0.125); // fmov d0, #0.125
+ __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125
+ __ fmovd(v0, 0.25); // fmov d0, #0.25
+ __ fmovd(v0, 0.265625); // fmov d0, #0.265625
+ __ fmovd(v0, 0.5); // fmov d0, #0.5
+ __ fmovd(v0, 0.53125); // fmov d0, #0.53125
+ __ fmovd(v0, 1.0); // fmov d0, #1.0
+ __ fmovd(v0, 1.0625); // fmov d0, #1.0625
+ __ fmovd(v0, -2.0); // fmov d0, #-2.0
+ __ fmovd(v0, -2.125); // fmov d0, #-2.125
+ __ fmovd(v0, -4.0); // fmov d0, #-4.0
+ __ fmovd(v0, -4.25); // fmov d0, #-4.25
+ __ fmovd(v0, -8.0); // fmov d0, #-8.0
+ __ fmovd(v0, -8.5); // fmov d0, #-8.5
+ __ fmovd(v0, -16.0); // fmov d0, #-16.0
+ __ fmovd(v0, -17.0); // fmov d0, #-17.0
+ __ fmovd(v0, -0.125); // fmov d0, #-0.125
+ __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125
+ __ fmovd(v0, -0.25); // fmov d0, #-0.25
+ __ fmovd(v0, -0.265625); // fmov d0, #-0.265625
+ __ fmovd(v0, -0.5); // fmov d0, #-0.5
+ __ fmovd(v0, -0.53125); // fmov d0, #-0.53125
+ __ fmovd(v0, -1.0); // fmov d0, #-1.0
+ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625
// LSEOp
- __ swp(Assembler::xword, r16, r20, r15); // swp x16, x20, [x15]
- __ ldadd(Assembler::xword, r2, r7, r28); // ldadd x2, x7, [x28]
- __ ldbic(Assembler::xword, r20, r10, r25); // ldclr x20, x10, [x25]
- __ ldeor(Assembler::xword, r22, r11, r2); // ldeor x22, x11, [x2]
- __ ldorr(Assembler::xword, r1, r10, r19); // ldset x1, x10, [x19]
- __ ldsmin(Assembler::xword, r14, r21, r3); // ldsmin x14, x21, [x3]
- __ ldsmax(Assembler::xword, r28, r27, r13); // ldsmax x28, x27, [x13]
- __ ldumin(Assembler::xword, r17, r30, r21); // ldumin x17, x30, [x21]
- __ ldumax(Assembler::xword, r27, r16, r29); // ldumax x27, x16, [x29]
+ __ swp(Assembler::xword, r11, r15, r21); // swp x11, x15, [x21]
+ __ ldadd(Assembler::xword, r23, r8, r5); // ldadd x23, x8, [x5]
+ __ ldbic(Assembler::xword, r7, r6, r8); // ldclr x7, x6, [x8]
+ __ ldeor(Assembler::xword, r14, r14, r23); // ldeor x14, x14, [x23]
+ __ ldorr(Assembler::xword, r10, r25, r0); // ldset x10, x25, [x0]
+ __ ldsmin(Assembler::xword, r5, r9, r21); // ldsmin x5, x9, [x21]
+ __ ldsmax(Assembler::xword, r4, r27, r17); // ldsmax x4, x27, [x17]
+ __ ldumin(Assembler::xword, r10, r6, r13); // ldumin x10, x6, [x13]
+ __ ldumax(Assembler::xword, r3, r3, r16); // ldumax x3, x3, [x16]
// LSEOp
- __ swpa(Assembler::xword, r30, r9, r0); // swpa x30, x9, [x0]
- __ ldadda(Assembler::xword, r28, r27, r28); // ldadda x28, x27, [x28]
- __ ldbica(Assembler::xword, r21, r25, r10); // ldclra x21, x25, [x10]
- __ ldeora(Assembler::xword, zr, r20, r15); // ldeora xzr, x20, [x15]
- __ ldorra(Assembler::xword, r1, r25, r14); // ldseta x1, x25, [x14]
- __ ldsmina(Assembler::xword, r21, r26, r29); // ldsmina x21, x26, [x29]
- __ ldsmaxa(Assembler::xword, r8, r29, r25); // ldsmaxa x8, x29, [x25]
- __ ldumina(Assembler::xword, r13, r2, r25); // ldumina x13, x2, [x25]
- __ ldumaxa(Assembler::xword, r15, r23, r0); // ldumaxa x15, x23, [x0]
+ __ swpa(Assembler::xword, r9, r28, r2); // swpa x9, x28, [x2]
+ __ ldadda(Assembler::xword, r23, r2, r1); // ldadda x23, x2, [x1]
+ __ ldbica(Assembler::xword, r4, r26, r7); // ldclra x4, x26, [x7]
+ __ ldeora(Assembler::xword, r0, r3, r10); // ldeora x0, x3, [x10]
+ __ ldorra(Assembler::xword, r24, r25, r3); // ldseta x24, x25, [x3]
+ __ ldsmina(Assembler::xword, r11, r8, r1); // ldsmina x11, x8, [x1]
+ __ ldsmaxa(Assembler::xword, r16, r13, r29); // ldsmaxa x16, x13, [x29]
+ __ ldumina(Assembler::xword, r6, r0, r5); // ldumina x6, x0, [x5]
+ __ ldumaxa(Assembler::xword, r16, r17, r13); // ldumaxa x16, x17, [x13]
// LSEOp
- __ swpal(Assembler::xword, r3, r1, r2); // swpal x3, x1, [x2]
- __ ldaddal(Assembler::xword, r28, r3, r20); // ldaddal x28, x3, [x20]
- __ ldbical(Assembler::xword, r14, zr, r14); // ldclral x14, xzr, [x14]
- __ ldeoral(Assembler::xword, r7, r28, r2); // ldeoral x7, x28, [x2]
- __ ldorral(Assembler::xword, r0, r11, r5); // ldsetal x0, x11, [x5]
- __ ldsminal(Assembler::xword, r11, r14, r20); // ldsminal x11, x14, [x20]
- __ ldsmaxal(Assembler::xword, zr, r4, r2); // ldsmaxal xzr, x4, [x2]
- __ lduminal(Assembler::xword, r26, r0, r22); // lduminal x26, x0, [x22]
- __ ldumaxal(Assembler::xword, r17, r1, r13); // ldumaxal x17, x1, [x13]
+ __ swpal(Assembler::xword, r11, r27, r14); // swpal x11, x27, [x14]
+ __ ldaddal(Assembler::xword, r2, r13, r21); // ldaddal x2, x13, [x21]
+ __ ldbical(Assembler::xword, r22, zr, r12); // ldclral x22, xzr, [x12]
+ __ ldeoral(Assembler::xword, r7, r30, r15); // ldeoral x7, x30, [x15]
+ __ ldorral(Assembler::xword, r7, r16, r15); // ldsetal x7, x16, [x15]
+ __ ldsminal(Assembler::xword, r16, r26, r13); // ldsminal x16, x26, [x13]
+ __ ldsmaxal(Assembler::xword, r23, r25, r27); // ldsmaxal x23, x25, [x27]
+ __ lduminal(Assembler::xword, r4, r14, sp); // lduminal x4, x14, [sp]
+ __ ldumaxal(Assembler::xword, r24, r1, r17); // ldumaxal x24, x1, [x17]
// LSEOp
- __ swpl(Assembler::xword, r23, r26, r20); // swpl x23, x26, [x20]
- __ ldaddl(Assembler::xword, r14, r11, r12); // ldaddl x14, x11, [x12]
- __ ldbicl(Assembler::xword, r12, zr, r15); // ldclrl x12, xzr, [x15]
- __ ldeorl(Assembler::xword, r27, r14, r8); // ldeorl x27, x14, [x8]
- __ ldorrl(Assembler::xword, r10, r30, r25); // ldsetl x10, x30, [x25]
- __ ldsminl(Assembler::xword, r22, r7, r16); // ldsminl x22, x7, [x16]
- __ ldsmaxl(Assembler::xword, r1, r16, r8); // ldsmaxl x1, x16, [x8]
- __ lduminl(Assembler::xword, r1, r1, r26); // lduminl x1, x1, [x26]
- __ ldumaxl(Assembler::xword, r0, r23, r15); // ldumaxl x0, x23, [x15]
+ __ swpl(Assembler::xword, r2, r8, r24); // swpl x2, x8, [x24]
+ __ ldaddl(Assembler::xword, r20, r27, r19); // ldaddl x20, x27, [x19]
+ __ ldbicl(Assembler::xword, r19, r17, r6); // ldclrl x19, x17, [x6]
+ __ ldeorl(Assembler::xword, r14, r28, r26); // ldeorl x14, x28, [x26]
+ __ ldorrl(Assembler::xword, r2, r16, r19); // ldsetl x2, x16, [x19]
+ __ ldsminl(Assembler::xword, r14, r16, r4); // ldsminl x14, x16, [x4]
+ __ ldsmaxl(Assembler::xword, r25, r8, r9); // ldsmaxl x25, x8, [x9]
+ __ lduminl(Assembler::xword, r10, r5, r29); // lduminl x10, x5, [x29]
+ __ ldumaxl(Assembler::xword, r6, r2, r14); // ldumaxl x6, x2, [x14]
// LSEOp
- __ swp(Assembler::word, r11, r16, r8); // swp w11, w16, [x8]
- __ ldadd(Assembler::word, r1, r7, r14); // ldadd w1, w7, [x14]
- __ ldbic(Assembler::word, r16, zr, r9); // ldclr w16, wzr, [x9]
- __ ldeor(Assembler::word, r22, r6, r13); // ldeor w22, w6, [x13]
- __ ldorr(Assembler::word, r11, r13, r4); // ldset w11, w13, [x4]
- __ ldsmin(Assembler::word, r16, r22, r0); // ldsmin w16, w22, [x0]
- __ ldsmax(Assembler::word, r28, zr, r10); // ldsmax w28, wzr, [x10]
- __ ldumin(Assembler::word, r16, r5, r8); // ldumin w16, w5, [x8]
- __ ldumax(Assembler::word, r26, r20, r15); // ldumax w26, w20, [x15]
+ __ swp(Assembler::word, r17, r11, r4); // swp w17, w11, [x4]
+ __ ldadd(Assembler::word, r7, r16, r15); // ldadd w7, w16, [x15]
+ __ ldbic(Assembler::word, r11, r25, r9); // ldclr w11, w25, [x9]
+ __ ldeor(Assembler::word, r3, r14, r0); // ldeor w3, w14, [x0]
+ __ ldorr(Assembler::word, r0, r30, r0); // ldset w0, w30, [x0]
+ __ ldsmin(Assembler::word, r6, r10, r28); // ldsmin w6, w10, [x28]
+ __ ldsmax(Assembler::word, r7, r14, r6); // ldsmax w7, w14, [x6]
+ __ ldumin(Assembler::word, r6, r30, r0); // ldumin w6, w30, [x0]
+ __ ldumax(Assembler::word, r22, r30, r29); // ldumax w22, w30, [x29]
// LSEOp
- __ swpa(Assembler::word, r27, r6, r16); // swpa w27, w6, [x16]
- __ ldadda(Assembler::word, zr, zr, r2); // ldadda wzr, wzr, [x2]
- __ ldbica(Assembler::word, r24, r28, r8); // ldclra w24, w28, [x8]
- __ ldeora(Assembler::word, r15, r9, r23); // ldeora w15, w9, [x23]
- __ ldorra(Assembler::word, r26, r2, r7); // ldseta w26, w2, [x7]
- __ ldsmina(Assembler::word, r3, r17, r15); // ldsmina w3, w17, [x15]
- __ ldsmaxa(Assembler::word, r19, r5, r21); // ldsmaxa w19, w5, [x21]
- __ ldumina(Assembler::word, r7, r26, r12); // ldumina w7, w26, [x12]
- __ ldumaxa(Assembler::word, r12, r7, r29); // ldumaxa w12, w7, [x29]
+ __ swpa(Assembler::word, r16, r14, r19); // swpa w16, w14, [x19]
+ __ ldadda(Assembler::word, r21, r3, r25); // ldadda w21, w3, [x25]
+ __ ldbica(Assembler::word, r2, r16, r19); // ldclra w2, w16, [x19]
+ __ ldeora(Assembler::word, r26, r20, r23); // ldeora w26, w20, [x23]
+ __ ldorra(Assembler::word, r17, r6, sp); // ldseta w17, w6, [sp]
+ __ ldsmina(Assembler::word, r5, r23, r30); // ldsmina w5, w23, [x30]
+ __ ldsmaxa(Assembler::word, r11, r12, r14); // ldsmaxa w11, w12, [x14]
+ __ ldumina(Assembler::word, r2, r20, r13); // ldumina w2, w20, [x13]
+ __ ldumaxa(Assembler::word, r15, r17, r20); // ldumaxa w15, w17, [x20]
// LSEOp
- __ swpal(Assembler::word, r9, r8, r20); // swpal w9, w8, [x20]
- __ ldaddal(Assembler::word, r8, zr, r30); // ldaddal w8, wzr, [x30]
- __ ldbical(Assembler::word, r0, r6, r12); // ldclral w0, w6, [x12]
- __ ldeoral(Assembler::word, r17, r23, r2); // ldeoral w17, w23, [x2]
- __ ldorral(Assembler::word, r0, r30, r1); // ldsetal w0, w30, [x1]
- __ ldsminal(Assembler::word, r22, r3, r15); // ldsminal w22, w3, [x15]
- __ ldsmaxal(Assembler::word, r25, r21, r13); // ldsmaxal w25, w21, [x13]
- __ lduminal(Assembler::word, r13, r24, r27); // lduminal w13, w24, [x27]
- __ ldumaxal(Assembler::word, r20, r3, r11); // ldumaxal w20, w3, [x11]
+ __ swpal(Assembler::word, r6, r28, r23); // swpal w6, w28, [x23]
+ __ ldaddal(Assembler::word, r27, r16, r13); // ldaddal w27, w16, [x13]
+ __ ldbical(Assembler::word, r2, r23, r24); // ldclral w2, w23, [x24]
+ __ ldeoral(Assembler::word, r0, r28, r10); // ldeoral w0, w28, [x10]
+ __ ldorral(Assembler::word, r3, r15, r5); // ldsetal w3, w15, [x5]
+ __ ldsminal(Assembler::word, r3, r11, r29); // ldsminal w3, w11, [x29]
+ __ ldsmaxal(Assembler::word, r22, r27, r6); // ldsmaxal w22, w27, [x6]
+ __ lduminal(Assembler::word, r17, r20, r16); // lduminal w17, w20, [x16]
+ __ ldumaxal(Assembler::word, r23, r15, r7); // ldumaxal w23, w15, [x7]
// LSEOp
- __ swpl(Assembler::word, r3, r13, r21); // swpl w3, w13, [x21]
- __ ldaddl(Assembler::word, r26, r15, r26); // ldaddl w26, w15, [x26]
- __ ldbicl(Assembler::word, r9, r19, r2); // ldclrl w9, w19, [x2]
- __ ldeorl(Assembler::word, r24, r29, r7); // ldeorl w24, w29, [x7]
- __ ldorrl(Assembler::word, r29, r25, r15); // ldsetl w29, w25, [x15]
- __ ldsminl(Assembler::word, r11, r30, r7); // ldsminl w11, w30, [x7]
- __ ldsmaxl(Assembler::word, r11, r2, r6); // ldsmaxl w11, w2, [x6]
- __ lduminl(Assembler::word, r16, r11, r14); // lduminl w16, w11, [x14]
- __ ldumaxl(Assembler::word, r5, r8, r11); // ldumaxl w5, w8, [x11]
+ __ swpl(Assembler::word, r8, r16, r14); // swpl w8, w16, [x14]
+ __ ldaddl(Assembler::word, r23, r16, r23); // ldaddl w23, w16, [x23]
+ __ ldbicl(Assembler::word, r28, r12, r7); // ldclrl w28, w12, [x7]
+ __ ldeorl(Assembler::word, r28, r7, r19); // ldeorl w28, w7, [x19]
+ __ ldorrl(Assembler::word, r7, r12, r11); // ldsetl w7, w12, [x11]
+ __ ldsminl(Assembler::word, r10, zr, r20); // ldsminl w10, wzr, [x20]
+ __ ldsmaxl(Assembler::word, r9, r8, sp); // ldsmaxl w9, w8, [sp]
+ __ lduminl(Assembler::word, r10, r8, r2); // lduminl w10, w8, [x2]
+ __ ldumaxl(Assembler::word, r17, zr, sp); // ldumaxl w17, wzr, [sp]
+
+// SVEVectorOp
+ __ sve_add(z2, __ H, z7, z22); // add z2.h, z7.h, z22.h
+ __ sve_sub(z30, __ S, z22, z30); // sub z30.s, z22.s, z30.s
+ __ sve_fadd(z10, __ D, z22, z25); // fadd z10.d, z22.d, z25.d
+ __ sve_fmul(z23, __ D, z16, z12); // fmul z23.d, z16.d, z12.d
+ __ sve_fsub(z3, __ D, z17, z25); // fsub z3.d, z17.d, z25.d
+ __ sve_abs(z25, __ S, p0, z4); // abs z25.s, p0/m, z4.s
+ __ sve_add(z23, __ H, p6, z26); // add z23.h, p6/m, z23.h, z26.h
+ __ sve_asr(z6, __ D, p0, z17); // asr z6.d, p0/m, z6.d, z17.d
+ __ sve_cnt(z23, __ D, p3, z3); // cnt z23.d, p3/m, z3.d
+ __ sve_lsl(z11, __ S, p7, z9); // lsl z11.s, p7/m, z11.s, z9.s
+ __ sve_lsr(z27, __ S, p7, z3); // lsr z27.s, p7/m, z27.s, z3.s
+ __ sve_mul(z9, __ S, p4, z2); // mul z9.s, p4/m, z9.s, z2.s
+ __ sve_neg(z16, __ B, p2, z15); // neg z16.b, p2/m, z15.b
+ __ sve_not(z9, __ D, p2, z9); // not z9.d, p2/m, z9.d
+ __ sve_smax(z10, __ S, p5, z23); // smax z10.s, p5/m, z10.s, z23.s
+ __ sve_smin(z13, __ B, p5, z25); // smin z13.b, p5/m, z13.b, z25.b
+ __ sve_sub(z19, __ S, p5, z0); // sub z19.s, p5/m, z19.s, z0.s
+ __ sve_fabs(z17, __ D, p0, z22); // fabs z17.d, p0/m, z22.d
+ __ sve_fadd(z9, __ S, p2, z16); // fadd z9.s, p2/m, z9.s, z16.s
+ __ sve_fdiv(z17, __ S, p5, z0); // fdiv z17.s, p5/m, z17.s, z0.s
+ __ sve_fmax(z29, __ S, p5, z3); // fmax z29.s, p5/m, z29.s, z3.s
+ __ sve_fmin(z1, __ S, p3, z17); // fmin z1.s, p3/m, z1.s, z17.s
+ __ sve_fmul(z14, __ D, p2, z0); // fmul z14.d, p2/m, z14.d, z0.d
+ __ sve_fneg(z19, __ D, p4, z22); // fneg z19.d, p4/m, z22.d
+ __ sve_frintm(z17, __ D, p1, z15); // frintm z17.d, p1/m, z15.d
+ __ sve_frintn(z8, __ D, p4, z4); // frintn z8.d, p4/m, z4.d
+ __ sve_frintp(z5, __ D, p4, z29); // frintp z5.d, p4/m, z29.d
+ __ sve_fsqrt(z11, __ D, p0, z19); // fsqrt z11.d, p0/m, z19.d
+ __ sve_fsub(z10, __ D, p4, z28); // fsub z10.d, p4/m, z10.d, z28.d
+ __ sve_fmla(z13, __ D, p3, z15, z11); // fmla z13.d, p3/m, z15.d, z11.d
+ __ sve_fmls(z6, __ S, p7, z20, z15); // fmls z6.s, p7/m, z20.s, z15.s
+ __ sve_fnmla(z30, __ S, p2, z13, z7); // fnmla z30.s, p2/m, z13.s, z7.s
+ __ sve_fnmls(z22, __ D, p6, z14, z19); // fnmls z22.d, p6/m, z14.d, z19.d
+ __ sve_mla(z30, __ H, p3, z25, z0); // mla z30.h, p3/m, z25.h, z0.h
+ __ sve_mls(z10, __ D, p2, z24, z1); // mls z10.d, p2/m, z24.d, z1.d
+ __ sve_and(z6, z17, z22); // and z6.d, z17.d, z22.d
+ __ sve_eor(z10, z9, z17); // eor z10.d, z9.d, z17.d
+ __ sve_orr(z2, z12, z21); // orr z2.d, z12.d, z21.d
+
+// SVEReductionOp
+ __ sve_andv(v15, __ S, p6, z14); // andv s15, p6, z14.s
+ __ sve_orv(v9, __ D, p3, z7); // orv d9, p3, z7.d
+ __ sve_eorv(v30, __ H, p5, z9); // eorv h30, p5, z9.h
+ __ sve_smaxv(v7, __ S, p4, z26); // smaxv s7, p4, z26.s
+ __ sve_sminv(v20, __ S, p3, z29); // sminv s20, p3, z29.s
+ __ sve_fminv(v28, __ S, p3, z16); // fminv s28, p3, z16.s
+ __ sve_fmaxv(v6, __ D, p3, z9); // fmaxv d6, p3, z9.d
+ __ sve_fadda(v10, __ S, p5, z3); // fadda s10, p5, s10, z3.s
+ __ sve_uaddv(v21, __ B, p6, z8); // uaddv d21, p6, z8.b
__ bind(forth);
@@ -762,680 +846,780 @@ aarch64ops.o: file format elf64-littleaarch64
Disassembly of section .text:
0000000000000000 <back>:
- 0: 8b18ec0f add x15, x0, x24, lsl #59
- 4: cb9636d1 sub x17, x22, x22, asr #13
- 8: ab1ce74a adds x10, x26, x28, lsl #57
- c: eb184a19 subs x25, x16, x24, lsl #18
- 10: 0b1c1ca8 add w8, w5, w28, lsl #7
- 14: 4b817388 sub w8, w28, w1, asr #28
- 18: 2b01004c adds w12, w2, w1
- 1c: 6b5164b7 subs w23, w5, w17, lsr #25
- 20: 8a0d5595 and x21, x12, x13, lsl #21
- 24: aa9791f5 orr x21, x15, x23, asr #36
- 28: ca9bc316 eor x22, x24, x27, asr #48
- 2c: ea82d1f6 ands x22, x15, x2, asr #52
- 30: 0a980e21 and w1, w17, w24, asr #3
- 34: 2a862c45 orr w5, w2, w6, asr #11
- 38: 4a453037 eor w23, w1, w5, lsr #12
- 3c: 6a8e5180 ands w0, w12, w14, asr #20
- 40: 8a621cc1 bic x1, x6, x2, lsr #7
- 44: aa24bd1e orn x30, x8, x4, lsl #47
- 48: cab4d6d1 eon x17, x22, x20, asr #53
- 4c: eaa591fd bics x29, x15, x5, asr #36
- 50: 0a7d6efe bic w30, w23, w29, lsr #27
- 54: 2a2253ac orn w12, w29, w2, lsl #20
- 58: 4aa61187 eon w7, w12, w6, asr #4
- 5c: 6aa755b0 bics w16, w13, w7, asr #21
- 60: 110b5a25 add w5, w17, #0x2d6
- 64: 31056e0a adds w10, w16, #0x15b
- 68: 510f48ba sub w26, w5, #0x3d2
- 6c: 710ac715 subs w21, w24, #0x2b1
- 70: 910f6e0a add x10, x16, #0x3db
- 74: b10a65ef adds x15, x15, #0x299
- 78: d1009e98 sub x24, x20, #0x27
- 7c: f10131aa subs x10, x13, #0x4c
- 80: 121d4e67 and w7, w19, #0x7ffff8
- 84: 32043e25 orr w5, w17, #0xf0000fff
- 88: 52132390 eor w16, w28, #0x3fe000
- 8c: 72160b0e ands w14, w24, #0x1c00
- 90: 9273e76e and x14, x27, #0xffffffffffffe07f
- 94: b256416c orr x12, x11, #0x7fffc0000000000
- 98: d24b5002 eor x2, x0, #0xffe00000000003ff
- 9c: f266da8d ands x13, x20, #0xfffffffffc01ffff
- a0: 14000000 b a0 <back+0xa0>
- a4: 17ffffd7 b 0 <back>
- a8: 140001ee b 860 <forth>
- ac: 94000000 bl ac <back+0xac>
- b0: 97ffffd4 bl 0 <back>
- b4: 940001eb bl 860 <forth>
- b8: 3400000f cbz w15, b8 <back+0xb8>
- bc: 34fffa2f cbz w15, 0 <back>
- c0: 34003d0f cbz w15, 860 <forth>
- c4: 3500001c cbnz w28, c4 <back+0xc4>
- c8: 35fff9dc cbnz w28, 0 <back>
- cc: 35003cbc cbnz w28, 860 <forth>
- d0: b400001b cbz x27, d0 <back+0xd0>
- d4: b4fff97b cbz x27, 0 <back>
- d8: b4003c5b cbz x27, 860 <forth>
- dc: b5000000 cbnz x0, dc <back+0xdc>
- e0: b5fff900 cbnz x0, 0 <back>
- e4: b5003be0 cbnz x0, 860 <forth>
- e8: 1000000d adr x13, e8 <back+0xe8>
- ec: 10fff8ad adr x13, 0 <back>
- f0: 10003b8d adr x13, 860 <forth>
- f4: 90000003 adrp x3, 0 <back>
- f8: 36380015 tbz w21, #7, f8 <back+0xf8>
- fc: 363ff835 tbz w21, #7, 0 <back>
- 100: 36383b15 tbz w21, #7, 860 <forth>
- 104: 3748000f tbnz w15, #9, 104 <back+0x104>
- 108: 374ff7cf tbnz w15, #9, 0 <back>
- 10c: 37483aaf tbnz w15, #9, 860 <forth>
- 110: 12a14bee mov w14, #0xf5a0ffff // #-173998081
- 114: 5283bb51 mov w17, #0x1dda // #7642
- 118: 72858ebb movk w27, #0x2c75
- 11c: 92c98881 mov x1, #0xffffb3bbffffffff // #-83854941487105
- 120: d2aa50d4 mov x20, #0x52860000 // #1384513536
- 124: f2afd9d4 movk x20, #0x7ece, lsl #16
- 128: 935c504d sbfiz x13, x2, #36, #21
- 12c: 33133e90 bfi w16, w20, #13, #16
- 130: 5309196b ubfiz w11, w11, #23, #7
- 134: 93595482 sbfiz x2, x4, #39, #22
- 138: b3424e0d bfxil x13, x16, #2, #18
- 13c: d3481728 ubfiz x8, x25, #56, #6
- 140: 138a3b7d extr w29, w27, w10, #14
- 144: 93c66286 extr x6, x20, x6, #24
- 148: 54000000 b.eq 148 <back+0x148> // b.none
- 14c: 54fff5a0 b.eq 0 <back> // b.none
- 150: 54003880 b.eq 860 <forth> // b.none
- 154: 54000001 b.ne 154 <back+0x154> // b.any
- 158: 54fff541 b.ne 0 <back> // b.any
- 15c: 54003821 b.ne 860 <forth> // b.any
- 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
- 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
- 168: 540037c2 b.cs 860 <forth> // b.hs, b.nlast
- 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
- 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
- 174: 54003762 b.cs 860 <forth> // b.hs, b.nlast
- 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
- 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
- 180: 54003703 b.cc 860 <forth> // b.lo, b.ul, b.last
- 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
- 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
- 18c: 540036a3 b.cc 860 <forth> // b.lo, b.ul, b.last
- 190: 54000004 b.mi 190 <back+0x190> // b.first
- 194: 54fff364 b.mi 0 <back> // b.first
- 198: 54003644 b.mi 860 <forth> // b.first
- 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
- 1a0: 54fff305 b.pl 0 <back> // b.nfrst
- 1a4: 540035e5 b.pl 860 <forth> // b.nfrst
- 1a8: 54000006 b.vs 1a8 <back+0x1a8>
- 1ac: 54fff2a6 b.vs 0 <back>
- 1b0: 54003586 b.vs 860 <forth>
- 1b4: 54000007 b.vc 1b4 <back+0x1b4>
- 1b8: 54fff247 b.vc 0 <back>
- 1bc: 54003527 b.vc 860 <forth>
- 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
- 1c4: 54fff1e8 b.hi 0 <back> // b.pmore
- 1c8: 540034c8 b.hi 860 <forth> // b.pmore
- 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
- 1d0: 54fff189 b.ls 0 <back> // b.plast
- 1d4: 54003469 b.ls 860 <forth> // b.plast
- 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
- 1dc: 54fff12a b.ge 0 <back> // b.tcont
- 1e0: 5400340a b.ge 860 <forth> // b.tcont
- 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
- 1e8: 54fff0cb b.lt 0 <back> // b.tstop
- 1ec: 540033ab b.lt 860 <forth> // b.tstop
- 1f0: 5400000c b.gt 1f0 <back+0x1f0>
- 1f4: 54fff06c b.gt 0 <back>
- 1f8: 5400334c b.gt 860 <forth>
- 1fc: 5400000d b.le 1fc <back+0x1fc>
- 200: 54fff00d b.le 0 <back>
- 204: 540032ed b.le 860 <forth>
- 208: 5400000e b.al 208 <back+0x208>
- 20c: 54ffefae b.al 0 <back>
- 210: 5400328e b.al 860 <forth>
- 214: 5400000f b.nv 214 <back+0x214>
- 218: 54ffef4f b.nv 0 <back>
- 21c: 5400322f b.nv 860 <forth>
- 220: d40d2881 svc #0x6944
- 224: d40ea5c2 hvc #0x752e
- 228: d40518a3 smc #0x28c5
- 22c: d42eca40 brk #0x7652
- 230: d44a2e60 hlt #0x5173
- 234: d503201f nop
- 238: d69f03e0 eret
- 23c: d6bf03e0 drps
- 240: d5033fdf isb
- 244: d5033d9f dsb ld
- 248: d5033bbf dmb ish
- 24c: d61f0120 br x9
- 250: d63f0120 blr x9
- 254: c8027d7d stxr w2, x29, [x11]
- 258: c816ff85 stlxr w22, x5, [x28]
- 25c: c85f7e8e ldxr x14, [x20]
- 260: c85ffe7d ldaxr x29, [x19]
- 264: c89ffea6 stlr x6, [x21]
- 268: c8dffc73 ldar x19, [x3]
- 26c: 880c7f63 stxr w12, w3, [x27]
- 270: 8811fdfa stlxr w17, w26, [x15]
- 274: 885f7dcd ldxr w13, [x14]
- 278: 885fff4c ldaxr w12, [x26]
- 27c: 889ffe28 stlr w8, [x17]
- 280: 88dfffd5 ldar w21, [x30]
- 284: 48007d6f stxrh w0, w15, [x11]
- 288: 4811fc34 stlxrh w17, w20, [x1]
- 28c: 485f7d1d ldxrh w29, [x8]
- 290: 485ffd91 ldaxrh w17, [x12]
- 294: 489ffc8b stlrh w11, [x4]
- 298: 48dffc90 ldarh w16, [x4]
- 29c: 080e7c85 stxrb w14, w5, [x4]
- 2a0: 081bfe11 stlxrb w27, w17, [x16]
- 2a4: 085f7f66 ldxrb w6, [x27]
- 2a8: 085fff1b ldaxrb w27, [x24]
- 2ac: 089ffe8a stlrb w10, [x20]
- 2b0: 08dfff49 ldarb w9, [x26]
- 2b4: c87f7b85 ldxp x5, x30, [x28]
- 2b8: c87fa66a ldaxp x10, x9, [x19]
- 2bc: c82b5590 stxp w11, x16, x21, [x12]
- 2c0: c82adc94 stlxp w10, x20, x23, [x4]
- 2c4: 887f0416 ldxp w22, w1, [x0]
- 2c8: 887f8503 ldaxp w3, w1, [x8]
- 2cc: 88205fc9 stxp w0, w9, w23, [x30]
- 2d0: 8837c560 stlxp w23, w0, w17, [x11]
- 2d4: f81e1146 stur x6, [x10, #-31]
- 2d8: b81fb007 stur w7, [x0, #-5]
- 2dc: 381f3205 sturb w5, [x16, #-13]
- 2e0: 7801f27e sturh w30, [x19, #31]
- 2e4: f8477130 ldur x16, [x9, #119]
- 2e8: b843b208 ldur w8, [x16, #59]
- 2ec: 385f918a ldurb w10, [x12, #-7]
- 2f0: 785da12e ldurh w14, [x9, #-38]
- 2f4: 389f83d8 ldursb x24, [x30, #-8]
- 2f8: 78817087 ldursh x7, [x4, #23]
- 2fc: 78dd91d1 ldursh w17, [x14, #-39]
- 300: b89e136b ldursw x11, [x27, #-31]
- 304: fc4410ec ldur d12, [x7, #65]
- 308: bc5fe200 ldur s0, [x16, #-2]
- 30c: fc15f2ed stur d13, [x23, #-161]
- 310: bc1c2075 stur s21, [x3, #-62]
- 314: f8064ca2 str x2, [x5, #100]!
- 318: b81a4c29 str w9, [x1, #-92]!
- 31c: 381fbfdb strb w27, [x30, #-5]!
- 320: 7800cdfb strh w27, [x15, #12]!
- 324: f852ce24 ldr x4, [x17, #-212]!
- 328: b841eef5 ldr w21, [x23, #30]!
- 32c: 385f9e2d ldrb w13, [x17, #-7]!
- 330: 785cec19 ldrh w25, [x0, #-50]!
- 334: 389ebea1 ldrsb x1, [x21, #-21]!
- 338: 789caebc ldrsh x28, [x21, #-54]!
- 33c: 78c02c8b ldrsh w11, [x4, #2]!
- 340: b883dd31 ldrsw x17, [x9, #61]!
- 344: fc427e7d ldr d29, [x19, #39]!
- 348: bc5abed6 ldr s22, [x22, #-85]!
- 34c: fc11ff29 str d9, [x25, #-225]!
- 350: bc1f1c49 str s9, [x2, #-15]!
- 354: f81be6ed str x13, [x23], #-66
- 358: b800a611 str w17, [x16], #10
- 35c: 381e05c1 strb w1, [x14], #-32
- 360: 78006411 strh w17, [x0], #6
- 364: f855473b ldr x27, [x25], #-172
- 368: b85da72d ldr w13, [x25], #-38
- 36c: 385e372b ldrb w11, [x25], #-29
- 370: 784144be ldrh w30, [x5], #20
- 374: 389f94e9 ldrsb x9, [x7], #-7
- 378: 789c2460 ldrsh x0, [x3], #-62
- 37c: 78c1f5c7 ldrsh w7, [x14], #31
- 380: b8827771 ldrsw x17, [x27], #39
- 384: fc515491 ldr d17, [x4], #-235
- 388: bc4226ba ldr s26, [x21], #34
- 38c: fc1c7625 str d5, [x17], #-57
- 390: bc1935ad str s13, [x13], #-109
- 394: f824da06 str x6, [x16, w4, sxtw #3]
- 398: b834db09 str w9, [x24, w20, sxtw #2]
- 39c: 38237ba3 strb w3, [x29, x3, lsl #0]
- 3a0: 783e6a2a strh w10, [x17, x30]
- 3a4: f867497b ldr x27, [x11, w7, uxtw]
- 3a8: b87949ee ldr w14, [x15, w25, uxtw]
- 3ac: 387379d8 ldrb w24, [x14, x19, lsl #0]
- 3b0: 7866c810 ldrh w16, [x0, w6, sxtw]
- 3b4: 38acd98a ldrsb x10, [x12, w12, sxtw #0]
- 3b8: 78b0499a ldrsh x26, [x12, w16, uxtw]
- 3bc: 78ee781a ldrsh w26, [x0, x14, lsl #1]
- 3c0: b8bbf971 ldrsw x17, [x11, x27, sxtx #2]
- 3c4: fc73d803 ldr d3, [x0, w19, sxtw #3]
- 3c8: bc6979fa ldr s26, [x15, x9, lsl #2]
- 3cc: fc30e9ab str d11, [x13, x16, sxtx]
- 3d0: bc355a7a str s26, [x19, w21, uxtw #2]
- 3d4: f91886a8 str x8, [x21, #12552]
- 3d8: b918ef6a str w10, [x27, #6380]
- 3dc: 391b15db strb w27, [x14, #1733]
- 3e0: 791ac0f0 strh w16, [x7, #3424]
- 3e4: f958753b ldr x27, [x9, #12520]
- 3e8: b95a1958 ldr w24, [x10, #6680]
- 3ec: 395b3f18 ldrb w24, [x24, #1743]
- 3f0: 795800b4 ldrh w20, [x5, #3072]
- 3f4: 39988891 ldrsb x17, [x4, #1570]
- 3f8: 799a81ae ldrsh x14, [x13, #3392]
- 3fc: 79dd172a ldrsh w10, [x25, #3722]
- 400: b9981342 ldrsw x2, [x26, #6160]
- 404: fd5d21da ldr d26, [x14, #14912]
- 408: bd5e7c9c ldr s28, [x4, #7804]
- 40c: fd1b526e str d14, [x19, #13984]
- 410: bd18df97 str s23, [x28, #6364]
- 414: 58002268 ldr x8, 860 <forth>
- 418: 18ffdf51 ldr w17, 0 <back>
- 41c: f8951080 prfum pldl1keep, [x4, #-175]
- 420: d8000000 prfm pldl1keep, 420 <back+0x420>
- 424: f8a4c900 prfm pldl1keep, [x8, w4, sxtw]
- 428: f999e180 prfm pldl1keep, [x12, #13248]
- 42c: 1a150374 adc w20, w27, w21
- 430: 3a060227 adcs w7, w17, w6
- 434: 5a1900c5 sbc w5, w6, w25
- 438: 7a0e017e sbcs w30, w11, w14
- 43c: 9a0b0223 adc x3, x17, x11
- 440: ba110159 adcs x25, x10, x17
- 444: da170207 sbc x7, x16, x23
- 448: fa050144 sbcs x4, x10, x5
- 44c: 0b2973c9 add w9, w30, w9, uxtx #4
- 450: 2b30a8a0 adds w0, w5, w16, sxth #2
- 454: cb3b8baf sub x15, x29, w27, sxtb #2
- 458: 6b21f12b subs w11, w9, w1, sxtx #4
- 45c: 8b264f02 add x2, x24, w6, uxtw #3
- 460: ab3a70d3 adds x19, x6, x26, uxtx #4
- 464: cb39ef48 sub x8, x26, x25, sxtx #3
- 468: eb29329a subs x26, x20, w9, uxth #4
- 46c: 3a5a41a7 ccmn w13, w26, #0x7, mi // mi = first
- 470: 7a54310f ccmp w8, w20, #0xf, cc // cc = lo, ul, last
- 474: ba4302c8 ccmn x22, x3, #0x8, eq // eq = none
- 478: fa58a04a ccmp x2, x24, #0xa, ge // ge = tcont
- 47c: 3a50490d ccmn w8, #0x10, #0xd, mi // mi = first
- 480: 7a4c0a01 ccmp w16, #0xc, #0x1, eq // eq = none
- 484: ba5f79e3 ccmn x15, #0x1f, #0x3, vc
- 488: fa4c0aef ccmp x23, #0xc, #0xf, eq // eq = none
- 48c: 1a9a30ee csel w14, w7, w26, cc // cc = lo, ul, last
- 490: 1a9ed763 csinc w3, w27, w30, le
- 494: 5a9702ab csinv w11, w21, w23, eq // eq = none
- 498: 5a95c7da csneg w26, w30, w21, gt
- 49c: 9a8d835c csel x28, x26, x13, hi // hi = pmore
- 4a0: 9a909471 csinc x17, x3, x16, ls // ls = plast
- 4a4: da8380ab csinv x11, x5, x3, hi // hi = pmore
- 4a8: da93c461 csneg x1, x3, x19, gt
- 4ac: 5ac00120 rbit w0, w9
- 4b0: 5ac005da rev16 w26, w14
- 4b4: 5ac00a2d rev w13, w17
- 4b8: 5ac0128b clz w11, w20
- 4bc: 5ac0163c cls w28, w17
- 4c0: dac0008d rbit x13, x4
- 4c4: dac007c1 rev16 x1, x30
- 4c8: dac009cd rev32 x13, x14
- 4cc: dac00d05 rev x5, x8
- 4d0: dac01322 clz x2, x25
- 4d4: dac01514 cls x20, x8
- 4d8: 1adb0b35 udiv w21, w25, w27
- 4dc: 1ad00d4d sdiv w13, w10, w16
- 4e0: 1ad1203c lsl w28, w1, w17
- 4e4: 1aca26f9 lsr w25, w23, w10
- 4e8: 1ac72867 asr w7, w3, w7
- 4ec: 1ace2fce ror w14, w30, w14
- 4f0: 9acf0acc udiv x12, x22, x15
- 4f4: 9acd0f22 sdiv x2, x25, x13
- 4f8: 9ad522e7 lsl x7, x23, x21
- 4fc: 9ac0258b lsr x11, x12, x0
- 500: 9adc293e asr x30, x9, x28
- 504: 9ad62cad ror x13, x5, x22
- 508: 9bc47ea5 umulh x5, x21, x4
- 50c: 9b477c51 smulh x17, x2, x7
- 510: 1b11318c madd w12, w12, w17, w12
- 514: 1b01edfe msub w30, w15, w1, w27
- 518: 9b117662 madd x2, x19, x17, x29
- 51c: 9b03fae4 msub x4, x23, x3, x30
- 520: 9b313eef smaddl x15, w23, w17, x15
- 524: 9b21b59b smsubl x27, w12, w1, x13
- 528: 9bac45a6 umaddl x6, w13, w12, x17
- 52c: 9ba6a839 umsubl x25, w1, w6, x10
- 530: 1e240871 fmul s17, s3, s4
- 534: 1e3518b0 fdiv s16, s5, s21
- 538: 1e312b63 fadd s3, s27, s17
- 53c: 1e2f3959 fsub s25, s10, s15
- 540: 1e200a2a fmul s10, s17, s0
- 544: 1e630b5c fmul d28, d26, d3
- 548: 1e7b1804 fdiv d4, d0, d27
- 54c: 1e6229dc fadd d28, d14, d2
- 550: 1e773b4c fsub d12, d26, d23
- 554: 1e610bcf fmul d15, d30, d1
- 558: 1f0534a4 fmadd s4, s5, s5, s13
- 55c: 1f1c85b5 fmsub s21, s13, s28, s1
- 560: 1f3d1c71 fnmadd s17, s3, s29, s7
- 564: 1f3d6b37 fnmadd s23, s25, s29, s26
- 568: 1f5e68ee fmadd d14, d7, d30, d26
- 56c: 1f4aa4f6 fmsub d22, d7, d10, d9
- 570: 1f6e24e7 fnmadd d7, d7, d14, d9
- 574: 1f6f630e fnmadd d14, d24, d15, d24
- 578: 1e204056 fmov s22, s2
- 57c: 1e20c060 fabs s0, s3
- 580: 1e214229 fneg s9, s17
- 584: 1e21c178 fsqrt s24, s11
- 588: 1e22c32f fcvt d15, s25
- 58c: 1e604064 fmov d4, d3
- 590: 1e60c2da fabs d26, d22
- 594: 1e61427e fneg d30, d19
- 598: 1e61c1cc fsqrt d12, d14
- 59c: 1e6240f1 fcvt s17, d7
- 5a0: 1e3801d8 fcvtzs w24, s14
- 5a4: 9e38034d fcvtzs x13, s26
- 5a8: 1e780022 fcvtzs w2, d1
- 5ac: 9e780165 fcvtzs x5, d11
- 5b0: 1e22026e scvtf s14, w19
- 5b4: 9e2202c1 scvtf s1, x22
- 5b8: 1e62023b scvtf d27, w17
- 5bc: 9e620136 scvtf d22, x9
- 5c0: 1e26006e fmov w14, s3
- 5c4: 9e66022c fmov x12, d17
- 5c8: 1e270368 fmov s8, w27
- 5cc: 9e67039d fmov d29, x28
- 5d0: 1e3e2000 fcmp s0, s30
- 5d4: 1e692180 fcmp d12, d9
- 5d8: 1e202148 fcmp s10, #0.0
- 5dc: 1e602328 fcmp d25, #0.0
- 5e0: 292e7b68 stp w8, w30, [x27, #-144]
- 5e4: 294a4f15 ldp w21, w19, [x24, #80]
- 5e8: 69626c50 ldpsw x16, x27, [x2, #-240]
- 5ec: a93814d5 stp x21, x5, [x6, #-128]
- 5f0: a97e679d ldp x29, x25, [x28, #-32]
- 5f4: 29903408 stp w8, w13, [x0, #128]!
- 5f8: 29ec5039 ldp w25, w20, [x1, #-160]!
- 5fc: 69fc62ce ldpsw x14, x24, [x22, #-32]!
- 600: a98504d1 stp x17, x1, [x6, #80]!
- 604: a9fc4735 ldp x21, x17, [x25, #-64]!
- 608: 28b05691 stp w17, w21, [x20], #-128
- 60c: 28c8705c ldp w28, w28, [x2], #64
- 610: 68e07953 ldpsw x19, x30, [x10], #-256
- 614: a8bf3e31 stp x17, x15, [x17], #-16
- 618: a8fe0331 ldp x17, x0, [x25], #-32
- 61c: 283c170e stnp w14, w5, [x24, #-32]
- 620: 284e4c37 ldnp w23, w19, [x1, #112]
- 624: a80419cb stnp x11, x6, [x14, #64]
- 628: a8722f62 ldnp x2, x11, [x27, #-224]
- 62c: 0c407230 ld1 {v16.8b}, [x17]
- 630: 4cdfa13d ld1 {v29.16b, v30.16b}, [x9], #32
- 634: 0cd56f1e ld1 {v30.1d, v31.1d, v0.1d}, [x24], x21
- 638: 4cdf2440 ld1 {v0.8h-v3.8h}, [x2], #64
- 63c: 0d40c134 ld1r {v20.8b}, [x9]
- 640: 4ddfc811 ld1r {v17.4s}, [x0], #4
- 644: 0ddaced5 ld1r {v21.1d}, [x22], x26
- 648: 4c408f33 ld2 {v19.2d, v20.2d}, [x25]
- 64c: 0cdf84aa ld2 {v10.4h, v11.4h}, [x5], #16
- 650: 4d60c30a ld2r {v10.16b, v11.16b}, [x24]
- 654: 0dffcbad ld2r {v13.2s, v14.2s}, [x29], #8
- 658: 4de2cf96 ld2r {v22.2d, v23.2d}, [x28], x2
- 65c: 4ccb489e ld3 {v30.4s, v31.4s, v0.4s}, [x4], x11
- 660: 0c40481d ld3 {v29.2s-v31.2s}, [x0]
- 664: 4d40e777 ld3r {v23.8h-v25.8h}, [x27]
- 668: 4ddfe943 ld3r {v3.4s-v5.4s}, [x10], #12
- 66c: 0dd6edd3 ld3r {v19.1d-v21.1d}, [x14], x22
- 670: 4cdf040e ld4 {v14.8h-v17.8h}, [x0], #64
- 674: 0cd902de ld4 {v30.8b, v31.8b, v0.8b, v1.8b}, [x22], x25
- 678: 0d60e019 ld4r {v25.8b-v28.8b}, [x0]
- 67c: 0dffe50a ld4r {v10.4h-v13.4h}, [x8], #8
- 680: 0dfce8c1 ld4r {v1.2s-v4.2s}, [x6], x28
- 684: ba5fd3e3 ccmn xzr, xzr, #0x3, le
- 688: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
- 68c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
- 690: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
- 694: 93df03ff ror xzr, xzr, #0
- 698: c820ffff stlxp w0, xzr, xzr, [sp]
- 69c: 8822fc7f stlxp w2, wzr, wzr, [x3]
- 6a0: c8247cbf stxp w4, xzr, xzr, [x5]
- 6a4: 88267fff stxp w6, wzr, wzr, [sp]
- 6a8: 4e010fe0 dup v0.16b, wzr
- 6ac: 4e081fe1 mov v1.d[0], xzr
- 6b0: 4e0c1fe1 mov v1.s[1], wzr
- 6b4: 4e0a1fe1 mov v1.h[2], wzr
- 6b8: 4e071fe1 mov v1.b[3], wzr
- 6bc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
- 6c0: 1e601000 fmov d0, #2.000000000000000000e+00
- 6c4: 1e603000 fmov d0, #2.125000000000000000e+00
- 6c8: 1e621000 fmov d0, #4.000000000000000000e+00
- 6cc: 1e623000 fmov d0, #4.250000000000000000e+00
- 6d0: 1e641000 fmov d0, #8.000000000000000000e+00
- 6d4: 1e643000 fmov d0, #8.500000000000000000e+00
- 6d8: 1e661000 fmov d0, #1.600000000000000000e+01
- 6dc: 1e663000 fmov d0, #1.700000000000000000e+01
- 6e0: 1e681000 fmov d0, #1.250000000000000000e-01
- 6e4: 1e683000 fmov d0, #1.328125000000000000e-01
- 6e8: 1e6a1000 fmov d0, #2.500000000000000000e-01
- 6ec: 1e6a3000 fmov d0, #2.656250000000000000e-01
- 6f0: 1e6c1000 fmov d0, #5.000000000000000000e-01
- 6f4: 1e6c3000 fmov d0, #5.312500000000000000e-01
- 6f8: 1e6e1000 fmov d0, #1.000000000000000000e+00
- 6fc: 1e6e3000 fmov d0, #1.062500000000000000e+00
- 700: 1e701000 fmov d0, #-2.000000000000000000e+00
- 704: 1e703000 fmov d0, #-2.125000000000000000e+00
- 708: 1e721000 fmov d0, #-4.000000000000000000e+00
- 70c: 1e723000 fmov d0, #-4.250000000000000000e+00
- 710: 1e741000 fmov d0, #-8.000000000000000000e+00
- 714: 1e743000 fmov d0, #-8.500000000000000000e+00
- 718: 1e761000 fmov d0, #-1.600000000000000000e+01
- 71c: 1e763000 fmov d0, #-1.700000000000000000e+01
- 720: 1e781000 fmov d0, #-1.250000000000000000e-01
- 724: 1e783000 fmov d0, #-1.328125000000000000e-01
- 728: 1e7a1000 fmov d0, #-2.500000000000000000e-01
- 72c: 1e7a3000 fmov d0, #-2.656250000000000000e-01
- 730: 1e7c1000 fmov d0, #-5.000000000000000000e-01
- 734: 1e7c3000 fmov d0, #-5.312500000000000000e-01
- 738: 1e7e1000 fmov d0, #-1.000000000000000000e+00
- 73c: 1e7e3000 fmov d0, #-1.062500000000000000e+00
- 740: f83081f4 swp x16, x20, [x15]
- 744: f8220387 ldadd x2, x7, [x28]
- 748: f834132a ldclr x20, x10, [x25]
- 74c: f836204b ldeor x22, x11, [x2]
- 750: f821326a ldset x1, x10, [x19]
- 754: f82e5075 ldsmin x14, x21, [x3]
- 758: f83c41bb ldsmax x28, x27, [x13]
- 75c: f83172be ldumin x17, x30, [x21]
- 760: f83b63b0 ldumax x27, x16, [x29]
- 764: f8be8009 swpa x30, x9, [x0]
- 768: f8bc039b ldadda x28, x27, [x28]
- 76c: f8b51159 ldclra x21, x25, [x10]
- 770: f8bf21f4 ldeora xzr, x20, [x15]
- 774: f8a131d9 ldseta x1, x25, [x14]
- 778: f8b553ba ldsmina x21, x26, [x29]
- 77c: f8a8433d ldsmaxa x8, x29, [x25]
- 780: f8ad7322 ldumina x13, x2, [x25]
- 784: f8af6017 ldumaxa x15, x23, [x0]
- 788: f8e38041 swpal x3, x1, [x2]
- 78c: f8fc0283 ldaddal x28, x3, [x20]
- 790: f8ee11df ldclral x14, xzr, [x14]
- 794: f8e7205c ldeoral x7, x28, [x2]
- 798: f8e030ab ldsetal x0, x11, [x5]
- 79c: f8eb528e ldsminal x11, x14, [x20]
- 7a0: f8ff4044 ldsmaxal xzr, x4, [x2]
- 7a4: f8fa72c0 lduminal x26, x0, [x22]
- 7a8: f8f161a1 ldumaxal x17, x1, [x13]
- 7ac: f877829a swpl x23, x26, [x20]
- 7b0: f86e018b ldaddl x14, x11, [x12]
- 7b4: f86c11ff stclrl x12, [x15]
- 7b8: f87b210e ldeorl x27, x14, [x8]
- 7bc: f86a333e ldsetl x10, x30, [x25]
- 7c0: f8765207 ldsminl x22, x7, [x16]
- 7c4: f8614110 ldsmaxl x1, x16, [x8]
- 7c8: f8617341 lduminl x1, x1, [x26]
- 7cc: f86061f7 ldumaxl x0, x23, [x15]
- 7d0: b82b8110 swp w11, w16, [x8]
- 7d4: b82101c7 ldadd w1, w7, [x14]
- 7d8: b830113f stclr w16, [x9]
- 7dc: b83621a6 ldeor w22, w6, [x13]
- 7e0: b82b308d ldset w11, w13, [x4]
- 7e4: b8305016 ldsmin w16, w22, [x0]
- 7e8: b83c415f stsmax w28, [x10]
- 7ec: b8307105 ldumin w16, w5, [x8]
- 7f0: b83a61f4 ldumax w26, w20, [x15]
- 7f4: b8bb8206 swpa w27, w6, [x16]
- 7f8: b8bf005f ldadda wzr, wzr, [x2]
- 7fc: b8b8111c ldclra w24, w28, [x8]
- 800: b8af22e9 ldeora w15, w9, [x23]
- 804: b8ba30e2 ldseta w26, w2, [x7]
- 808: b8a351f1 ldsmina w3, w17, [x15]
- 80c: b8b342a5 ldsmaxa w19, w5, [x21]
- 810: b8a7719a ldumina w7, w26, [x12]
- 814: b8ac63a7 ldumaxa w12, w7, [x29]
- 818: b8e98288 swpal w9, w8, [x20]
- 81c: b8e803df ldaddal w8, wzr, [x30]
- 820: b8e01186 ldclral w0, w6, [x12]
- 824: b8f12057 ldeoral w17, w23, [x2]
- 828: b8e0303e ldsetal w0, w30, [x1]
- 82c: b8f651e3 ldsminal w22, w3, [x15]
- 830: b8f941b5 ldsmaxal w25, w21, [x13]
- 834: b8ed7378 lduminal w13, w24, [x27]
- 838: b8f46163 ldumaxal w20, w3, [x11]
- 83c: b86382ad swpl w3, w13, [x21]
- 840: b87a034f ldaddl w26, w15, [x26]
- 844: b8691053 ldclrl w9, w19, [x2]
- 848: b87820fd ldeorl w24, w29, [x7]
- 84c: b87d31f9 ldsetl w29, w25, [x15]
- 850: b86b50fe ldsminl w11, w30, [x7]
- 854: b86b40c2 ldsmaxl w11, w2, [x6]
- 858: b87071cb lduminl w16, w11, [x14]
- 85c: b8656168 ldumaxl w5, w8, [x11]
+ 0: 8b8e677b add x27, x27, x14, asr #25
+ 4: cb512964 sub x4, x11, x17, lsr #10
+ 8: ab998627 adds x7, x17, x25, asr #33
+ c: eb9416cd subs x13, x22, x20, asr #5
+ 10: 0b83438a add w10, w28, w3, asr #16
+ 14: 4b463c55 sub w21, w2, w6, lsr #15
+ 18: 2b9b2406 adds w6, w0, w27, asr #9
+ 1c: 6b882b65 subs w5, w27, w8, asr #10
+ 20: 8a879c8c and x12, x4, x7, asr #39
+ 24: aa16cb75 orr x21, x27, x22, lsl #50
+ 28: ca80baa3 eor x3, x21, x0, asr #46
+ 2c: ea855955 ands x21, x10, x5, asr #22
+ 30: 0a1d5aad and w13, w21, w29, lsl #22
+ 34: 2a504951 orr w17, w10, w16, lsr #18
+ 38: 4a976cf0 eor w16, w7, w23, asr #27
+ 3c: 6a8c30ca ands w10, w6, w12, asr #12
+ 40: 8a275b33 bic x19, x25, x7, lsl #22
+ 44: aa27d459 orn x25, x2, x7, lsl #53
+ 48: cab70ee9 eon x9, x23, x23, asr #3
+ 4c: eaadc8c5 bics x5, x6, x13, asr #50
+ 50: 0a2a26af bic w15, w21, w10, lsl #9
+ 54: 2abe06b1 orn w17, w21, w30, asr #1
+ 58: 4a3d4f87 eon w7, w28, w29, lsl #19
+ 5c: 6ab632d9 bics w25, w22, w22, asr #12
+ 60: 110c5346 add w6, w26, #0x314
+ 64: 3107aa23 adds w3, w17, #0x1ea
+ 68: 5107eea5 sub w5, w21, #0x1fb
+ 6c: 710dcf76 subs w22, w27, #0x373
+ 70: 9103d10c add x12, x8, #0xf4
+ 74: b10e811d adds x29, x8, #0x3a0
+ 78: d10a087a sub x26, x3, #0x282
+ 7c: f109d1fd subs x29, x15, #0x274
+ 80: 1209afd5 and w21, w30, #0xff87ff87
+ 84: 32099d95 orr w21, w12, #0x7f807f80
+ 88: 5202c62b eor w11, w17, #0xc0c0c0c0
+ 8c: 720897da ands w26, w30, #0x3f003f00
+ 90: 920e36f9 and x25, x23, #0xfffc0000fffc0000
+ 94: b243f1de orr x30, x14, #0xe3ffffffffffffff
+ 98: d263d09a eor x26, x4, #0xffffffffe003ffff
+ 9c: f24fd01a ands x26, x0, #0xfffe003fffffffff
+ a0: 14000000 b a0 <back+0xa0>
+ a4: 17ffffd7 b 0 <back>
+ a8: 1400023e b 9a0 <forth>
+ ac: 94000000 bl ac <back+0xac>
+ b0: 97ffffd4 bl 0 <back>
+ b4: 9400023b bl 9a0 <forth>
+ b8: 3400001c cbz w28, b8 <back+0xb8>
+ bc: 34fffa3c cbz w28, 0 <back>
+ c0: 3400471c cbz w28, 9a0 <forth>
+ c4: 35000011 cbnz w17, c4 <back+0xc4>
+ c8: 35fff9d1 cbnz w17, 0 <back>
+ cc: 350046b1 cbnz w17, 9a0 <forth>
+ d0: b4000019 cbz x25, d0 <back+0xd0>
+ d4: b4fff979 cbz x25, 0 <back>
+ d8: b4004659 cbz x25, 9a0 <forth>
+ dc: b5000002 cbnz x2, dc <back+0xdc>
+ e0: b5fff902 cbnz x2, 0 <back>
+ e4: b50045e2 cbnz x2, 9a0 <forth>
+ e8: 1000001d adr x29, e8 <back+0xe8>
+ ec: 10fff8bd adr x29, 0 <back>
+ f0: 1000459d adr x29, 9a0 <forth>
+ f4: 9000001d adrp x29, 0 <back>
+ f8: 36300006 tbz w6, #6, f8 <back+0xf8>
+ fc: 3637f826 tbz w6, #6, 0 <back>
+ 100: 36304506 tbz w6, #6, 9a0 <forth>
+ 104: 37100015 tbnz w21, #2, 104 <back+0x104>
+ 108: 3717f7d5 tbnz w21, #2, 0 <back>
+ 10c: 371044b5 tbnz w21, #2, 9a0 <forth>
+ 110: 128155e8 mov w8, #0xfffff550 // #-2736
+ 114: 52a5762b mov w11, #0x2bb10000 // #733020160
+ 118: 72acb59a movk w26, #0x65ac, lsl #16
+ 11c: 92866a8d mov x13, #0xffffffffffffccab // #-13141
+ 120: d2e2d8a6 mov x6, #0x16c5000000000000 // #1640717639246413824
+ 124: f2c54450 movk x16, #0x2a22, lsl #32
+ 128: 93516bde sbfx x30, x30, #17, #10
+ 12c: 330f3124 bfi w4, w9, #17, #13
+ 130: 5301168f ubfx w15, w20, #1, #5
+ 134: 9353391b sbfiz x27, x8, #45, #15
+ 138: b355741e bfxil x30, x0, #21, #9
+ 13c: d3562f5b ubfiz x27, x26, #42, #12
+ 140: 13866d8c extr w12, w12, w6, #27
+ 144: 93d6b5b3 extr x19, x13, x22, #45
+ 148: 54000000 b.eq 148 <back+0x148> // b.none
+ 14c: 54fff5a0 b.eq 0 <back> // b.none
+ 150: 54004280 b.eq 9a0 <forth> // b.none
+ 154: 54000001 b.ne 154 <back+0x154> // b.any
+ 158: 54fff541 b.ne 0 <back> // b.any
+ 15c: 54004221 b.ne 9a0 <forth> // b.any
+ 160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
+ 164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
+ 168: 540041c2 b.cs 9a0 <forth> // b.hs, b.nlast
+ 16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
+ 170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
+ 174: 54004162 b.cs 9a0 <forth> // b.hs, b.nlast
+ 178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
+ 17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
+ 180: 54004103 b.cc 9a0 <forth> // b.lo, b.ul, b.last
+ 184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
+ 188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
+ 18c: 540040a3 b.cc 9a0 <forth> // b.lo, b.ul, b.last
+ 190: 54000004 b.mi 190 <back+0x190> // b.first
+ 194: 54fff364 b.mi 0 <back> // b.first
+ 198: 54004044 b.mi 9a0 <forth> // b.first
+ 19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
+ 1a0: 54fff305 b.pl 0 <back> // b.nfrst
+ 1a4: 54003fe5 b.pl 9a0 <forth> // b.nfrst
+ 1a8: 54000006 b.vs 1a8 <back+0x1a8>
+ 1ac: 54fff2a6 b.vs 0 <back>
+ 1b0: 54003f86 b.vs 9a0 <forth>
+ 1b4: 54000007 b.vc 1b4 <back+0x1b4>
+ 1b8: 54fff247 b.vc 0 <back>
+ 1bc: 54003f27 b.vc 9a0 <forth>
+ 1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
+ 1c4: 54fff1e8 b.hi 0 <back> // b.pmore
+ 1c8: 54003ec8 b.hi 9a0 <forth> // b.pmore
+ 1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
+ 1d0: 54fff189 b.ls 0 <back> // b.plast
+ 1d4: 54003e69 b.ls 9a0 <forth> // b.plast
+ 1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
+ 1dc: 54fff12a b.ge 0 <back> // b.tcont
+ 1e0: 54003e0a b.ge 9a0 <forth> // b.tcont
+ 1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
+ 1e8: 54fff0cb b.lt 0 <back> // b.tstop
+ 1ec: 54003dab b.lt 9a0 <forth> // b.tstop
+ 1f0: 5400000c b.gt 1f0 <back+0x1f0>
+ 1f4: 54fff06c b.gt 0 <back>
+ 1f8: 54003d4c b.gt 9a0 <forth>
+ 1fc: 5400000d b.le 1fc <back+0x1fc>
+ 200: 54fff00d b.le 0 <back>
+ 204: 54003ced b.le 9a0 <forth>
+ 208: 5400000e b.al 208 <back+0x208>
+ 20c: 54ffefae b.al 0 <back>
+ 210: 54003c8e b.al 9a0 <forth>
+ 214: 5400000f b.nv 214 <back+0x214>
+ 218: 54ffef4f b.nv 0 <back>
+ 21c: 54003c2f b.nv 9a0 <forth>
+ 220: d407da81 svc #0x3ed4
+ 224: d402d542 hvc #0x16aa
+ 228: d406dae3 smc #0x36d7
+ 22c: d4258fa0 brk #0x2c7d
+ 230: d44d5960 hlt #0x6acb
+ 234: d503201f nop
+ 238: d69f03e0 eret
+ 23c: d6bf03e0 drps
+ 240: d5033fdf isb
+ 244: d503339f dsb osh
+ 248: d50336bf dmb nshst
+ 24c: d61f0160 br x11
+ 250: d63f0320 blr x25
+ 254: c80e7daf stxr w14, x15, [x13]
+ 258: c81efc39 stlxr w30, x25, [x1]
+ 25c: c85f7c6d ldxr x13, [x3]
+ 260: c85ffea8 ldaxr x8, [x21]
+ 264: c89fff8d stlr x13, [x28]
+ 268: c8dfffc8 ldar x8, [x30]
+ 26c: 880d7f91 stxr w13, w17, [x28]
+ 270: 8815fe71 stlxr w21, w17, [x19]
+ 274: 885f7d03 ldxr w3, [x8]
+ 278: 885ffebd ldaxr w29, [x21]
+ 27c: 889fff09 stlr w9, [x24]
+ 280: 88dffcc2 ldar w2, [x6]
+ 284: 480c7e14 stxrh w12, w20, [x16]
+ 288: 4802fcbc stlxrh w2, w28, [x5]
+ 28c: 485f7c61 ldxrh w1, [x3]
+ 290: 485ffdb8 ldaxrh w24, [x13]
+ 294: 489fff2f stlrh w15, [x25]
+ 298: 48dffe8a ldarh w10, [x20]
+ 29c: 08057db0 stxrb w5, w16, [x13]
+ 2a0: 080afe2f stlxrb w10, w15, [x17]
+ 2a4: 085f7e71 ldxrb w17, [x19]
+ 2a8: 085ffd3e ldaxrb w30, [x9]
+ 2ac: 089fff14 stlrb w20, [x24]
+ 2b0: 08dffc8a ldarb w10, [x4]
+ 2b4: c87f2139 ldxp x25, x8, [x9]
+ 2b8: c87faa07 ldaxp x7, x10, [x16]
+ 2bc: c8392d30 stxp w25, x16, x11, [x9]
+ 2c0: c827a5e5 stlxp w7, x5, x9, [x15]
+ 2c4: 887f106c ldxp w12, w4, [x3]
+ 2c8: 887f88b1 ldaxp w17, w2, [x5]
+ 2cc: 882460c8 stxp w4, w8, w24, [x6]
+ 2d0: 8824e60c stlxp w4, w12, w25, [x16]
+ 2d4: f800b3ce stur x14, [x30, #11]
+ 2d8: b819f3a6 stur w6, [x29, #-97]
+ 2dc: 381f9162 sturb w2, [x11, #-7]
+ 2e0: 781ea114 sturh w20, [x8, #-22]
+ 2e4: f85e33b4 ldur x20, [x29, #-29]
+ 2e8: b85e6009 ldur w9, [x0, #-26]
+ 2ec: 3940204e ldrb w14, [x2, #8]
+ 2f0: 785e802d ldurh w13, [x1, #-24]
+ 2f4: 389f922d ldursb x13, [x17, #-7]
+ 2f8: 789f50f1 ldursh x17, [x7, #-11]
+ 2fc: 78dc4103 ldursh w3, [x8, #-60]
+ 300: b9800d8e ldrsw x14, [x12, #12]
+ 304: fc5152a5 ldur d5, [x21, #-235]
+ 308: bc5ca009 ldur s9, [x0, #-54]
+ 30c: fc05f10f stur d15, [x8, #95]
+ 310: bc1f0016 stur s22, [x0, #-16]
+ 314: f8111c97 str x23, [x4, #-239]!
+ 318: b8186c11 str w17, [x0, #-122]!
+ 31c: 381fbd3a strb w26, [x9, #-5]!
+ 320: 781f8dd5 strh w21, [x14, #-8]!
+ 324: f8417ce8 ldr x8, [x7, #23]!
+ 328: b8416d0c ldr w12, [x8, #22]!
+ 32c: 38406f9b ldrb w27, [x28, #6]!
+ 330: 785c6e66 ldrh w6, [x19, #-58]!
+ 334: 389ecca7 ldrsb x7, [x5, #-20]!
+ 338: 789e0e36 ldrsh x22, [x17, #-32]!
+ 33c: 78dfedb1 ldrsh w17, [x13, #-2]!
+ 340: b8816c9d ldrsw x29, [x4, #22]!
+ 344: fc5b2f88 ldr d8, [x28, #-78]!
+ 348: bc5fbd77 ldr s23, [x11, #-5]!
+ 34c: fc1e9e89 str d9, [x20, #-23]!
+ 350: bc199c65 str s5, [x3, #-103]!
+ 354: f802044d str x13, [x2], #32
+ 358: b803967e str w30, [x19], #57
+ 35c: 3800343d strb w29, [x1], #3
+ 360: 781ef74a strh w10, [x26], #-17
+ 364: f85f442f ldr x15, [x1], #-12
+ 368: b85fa4a1 ldr w1, [x5], #-6
+ 36c: 385f25f8 ldrb w24, [x15], #-14
+ 370: 785fb63d ldrh w29, [x17], #-5
+ 374: 389ef5e4 ldrsb x4, [x15], #-17
+ 378: 789ca446 ldrsh x6, [x2], #-54
+ 37c: 78c1277b ldrsh w27, [x27], #18
+ 380: b89b3729 ldrsw x9, [x25], #-77
+ 384: fc5507b5 ldr d21, [x29], #-176
+ 388: bc5ce53e ldr s30, [x9], #-50
+ 38c: fc1d2582 str d2, [x12], #-46
+ 390: bc1c56a7 str s7, [x21], #-59
+ 394: f837598c str x12, [x12, w23, uxtw #3]
+ 398: b8364bce str w14, [x30, w22, uxtw]
+ 39c: 383a586c strb w12, [x3, w26, uxtw #0]
+ 3a0: 783e49cb strh w11, [x14, w30, uxtw]
+ 3a4: f8787918 ldr x24, [x8, x24, lsl #3]
+ 3a8: b87469ac ldr w12, [x13, x20]
+ 3ac: 38655896 ldrb w22, [x4, w5, uxtw #0]
+ 3b0: 786658bc ldrh w28, [x5, w6, uxtw #1]
+ 3b4: 38b97962 ldrsb x2, [x11, x25, lsl #0]
+ 3b8: 78b9ead7 ldrsh x23, [x22, x25, sxtx]
+ 3bc: 78f6da83 ldrsh w3, [x20, w22, sxtw #1]
+ 3c0: b8aefba9 ldrsw x9, [x29, x14, sxtx #2]
+ 3c4: fc7dfaf0 ldr d16, [x23, x29, sxtx #3]
+ 3c8: bc747b87 ldr s7, [x28, x20, lsl #2]
+ 3cc: fc387a94 str d20, [x20, x24, lsl #3]
+ 3d0: bc377ab9 str s25, [x21, x23, lsl #2]
+ 3d4: f9180c51 str x17, [x2, #12312]
+ 3d8: b91b38fe str w30, [x7, #6968]
+ 3dc: 391ca4e3 strb w3, [x7, #1833]
+ 3e0: 791a4c27 strh w7, [x1, #3366]
+ 3e4: f95ca767 ldr x7, [x27, #14664]
+ 3e8: b9580e28 ldr w8, [x17, #6156]
+ 3ec: 3958ea20 ldrb w0, [x17, #1594]
+ 3f0: 795bd680 ldrh w0, [x20, #3562]
+ 3f4: 399a4633 ldrsb x19, [x17, #1681]
+ 3f8: 799d80d3 ldrsh x19, [x6, #3776]
+ 3fc: 79dcf944 ldrsh w4, [x10, #3708]
+ 400: b99b249d ldrsw x29, [x4, #6948]
+ 404: fd5a143d ldr d29, [x1, #13352]
+ 408: bd59938f ldr s15, [x28, #6544]
+ 40c: fd1b9347 str d7, [x26, #14112]
+ 410: bd1aa7c0 str s0, [x30, #6820]
+ 414: 58000019 ldr x25, 414 <back+0x414>
+ 418: 18000009 ldr w9, 418 <back+0x418>
+ 41c: f88692c0 prfum pldl1keep, [x22, #105]
+ 420: d8ffdf00 prfm pldl1keep, 0 <back>
+ 424: f8be7b80 prfm pldl1keep, [x28, x30, lsl #3]
+ 428: f99c8260 prfm pldl1keep, [x19, #14592]
+ 42c: 1a180111 adc w17, w8, w24
+ 430: 3a09022e adcs w14, w17, w9
+ 434: 5a190036 sbc w22, w1, w25
+ 438: 7a13012f sbcs w15, w9, w19
+ 43c: 9a0b028f adc x15, x20, x11
+ 440: ba1e0164 adcs x4, x11, x30
+ 444: da060114 sbc x20, x8, x6
+ 448: fa0f02aa sbcs x10, x21, x15
+ 44c: 0b298d61 add w1, w11, w9, sxtb #3
+ 450: 2b3cee24 adds w4, w17, w28, sxtx #3
+ 454: cb3ca7b5 sub x21, x29, w28, sxth #1
+ 458: 6b37d38b subs w11, w28, w23, sxtw #4
+ 45c: 8b25f34c add x12, x26, x5, sxtx #4
+ 460: ab3e68d1 adds x17, x6, x30, uxtx #2
+ 464: cb210a87 sub x7, x20, w1, uxtb #2
+ 468: eb3eed3e subs x30, x9, x30, sxtx #3
+ 46c: 3a4b0087 ccmn w4, w11, #0x7, eq // eq = none
+ 470: 7a4571eb ccmp w15, w5, #0xb, vc
+ 474: ba5122e6 ccmn x23, x17, #0x6, cs // cs = hs, nlast
+ 478: fa4bc16a ccmp x11, x11, #0xa, gt
+ 47c: 3a4519cc ccmn w14, #0x5, #0xc, ne // ne = any
+ 480: 7a5c1aef ccmp w23, #0x1c, #0xf, ne // ne = any
+ 484: ba5e3a27 ccmn x17, #0x1e, #0x7, cc // cc = lo, ul, last
+ 488: fa4c8bc0 ccmp x30, #0xc, #0x0, hi // hi = pmore
+ 48c: 1a81537a csel w26, w27, w1, pl // pl = nfrst
+ 490: 1a95d56e csinc w14, w11, w21, le
+ 494: 5a8f60de csinv w30, w6, w15, vs
+ 498: 5a995451 csneg w17, w2, w25, pl // pl = nfrst
+ 49c: 9a8780b0 csel x16, x5, x7, hi // hi = pmore
+ 4a0: 9a9cc68a csinc x10, x20, x28, gt
+ 4a4: da8180e6 csinv x6, x7, x1, hi // hi = pmore
+ 4a8: da912756 csneg x22, x26, x17, cs // cs = hs, nlast
+ 4ac: 5ac000cb rbit w11, w6
+ 4b0: 5ac00760 rev16 w0, w27
+ 4b4: 5ac00ba1 rev w1, w29
+ 4b8: 5ac012b4 clz w20, w21
+ 4bc: 5ac0158c cls w12, w12
+ 4c0: dac00278 rbit x24, x19
+ 4c4: dac005f7 rev16 x23, x15
+ 4c8: dac00831 rev32 x17, x1
+ 4cc: dac00c7b rev x27, x3
+ 4d0: dac010be clz x30, x5
+ 4d4: dac0140f cls x15, x0
+ 4d8: 1ad4080e udiv w14, w0, w20
+ 4dc: 1ad50d9b sdiv w27, w12, w21
+ 4e0: 1ada214c lsl w12, w10, w26
+ 4e4: 1ac6266e lsr w14, w19, w6
+ 4e8: 1ade2a7b asr w27, w19, w30
+ 4ec: 1ad02dc6 ror w6, w14, w16
+ 4f0: 9ac209b1 udiv x17, x13, x2
+ 4f4: 9ac20fa0 sdiv x0, x29, x2
+ 4f8: 9ac2220c lsl x12, x16, x2
+ 4fc: 9add26e9 lsr x9, x23, x29
+ 500: 9add2a26 asr x6, x17, x29
+ 504: 9ada2fce ror x14, x30, x26
+ 508: 9bda7f11 umulh x17, x24, x26
+ 50c: 9b4e7f54 smulh x20, x26, x14
+ 510: 1b021d1b madd w27, w8, w2, w7
+ 514: 1b19b1bc msub w28, w13, w25, w12
+ 518: 9b0a6d24 madd x4, x9, x10, x27
+ 51c: 9b08f956 msub x22, x10, x8, x30
+ 520: 9b391694 smaddl x20, w20, w25, x5
+ 524: 9b2beed6 smsubl x22, w22, w11, x27
+ 528: 9bac4cc4 umaddl x4, w6, w12, x19
+ 52c: 9ba881f1 umsubl x17, w15, w8, x0
+ 530: 1e2a08b6 fmul s22, s5, s10
+ 534: 1e301904 fdiv s4, s8, s16
+ 538: 1e262919 fadd s25, s8, s6
+ 53c: 1e393b66 fsub s6, s27, s25
+ 540: 1e290aea fmul s10, s23, s9
+ 544: 1e6c0a36 fmul d22, d17, d12
+ 548: 1e74180b fdiv d11, d0, d20
+ 54c: 1e6f2980 fadd d0, d12, d15
+ 550: 1e643acf fsub d15, d22, d4
+ 554: 1e79083d fmul d29, d1, d25
+ 558: 1f131769 fmadd s9, s27, s19, s5
+ 55c: 1f06e87a fmsub s26, s3, s6, s26
+ 560: 1f285184 fnmadd s4, s12, s8, s20
+ 564: 1f354539 fnmadd s25, s9, s21, s17
+ 568: 1f5e5867 fmadd d7, d3, d30, d22
+ 56c: 1f4aab61 fmsub d1, d27, d10, d10
+ 570: 1f760511 fnmadd d17, d8, d22, d1
+ 574: 1f626f8e fnmadd d14, d28, d2, d27
+ 578: 1e2043db fmov s27, s30
+ 57c: 1e20c025 fabs s5, s1
+ 580: 1e214277 fneg s23, s19
+ 584: 1e21c23c fsqrt s28, s17
+ 588: 1e22c0d9 fcvt d25, s6
+ 58c: 1e6041d4 fmov d20, d14
+ 590: 1e60c151 fabs d17, d10
+ 594: 1e61422a fneg d10, d17
+ 598: 1e61c235 fsqrt d21, d17
+ 59c: 1e6241f5 fcvt s21, d15
+ 5a0: 1e380167 fcvtzs w7, s11
+ 5a4: 9e3803a2 fcvtzs x2, s29
+ 5a8: 1e780323 fcvtzs w3, d25
+ 5ac: 9e78011c fcvtzs x28, d8
+ 5b0: 1e22006b scvtf s11, w3
+ 5b4: 9e2202a2 scvtf s2, x21
+ 5b8: 1e62033d scvtf d29, w25
+ 5bc: 9e620073 scvtf d19, x3
+ 5c0: 1e2603b4 fmov w20, s29
+ 5c4: 9e660237 fmov x23, d17
+ 5c8: 1e270380 fmov s0, w28
+ 5cc: 9e670289 fmov d9, x20
+ 5d0: 1e2c20e0 fcmp s7, s12
+ 5d4: 1e6e21a0 fcmp d13, d14
+ 5d8: 1e202188 fcmp s12, #0.0
+ 5dc: 1e602028 fcmp d1, #0.0
+ 5e0: 29380acc stp w12, w2, [x22, #-64]
+ 5e4: 2966271b ldp w27, w9, [x24, #-208]
+ 5e8: 696a130f ldpsw x15, x4, [x24, #-176]
+ 5ec: a9015405 stp x5, x21, [x0, #16]
+ 5f0: a9735d26 ldp x6, x23, [x9, #-208]
+ 5f4: 29820fa0 stp w0, w3, [x29, #16]!
+ 5f8: 29ee403d ldp w29, w16, [x1, #-144]!
+ 5fc: 69c24ebb ldpsw x27, x19, [x21, #16]!
+ 600: a9b545a6 stp x6, x17, [x13, #-176]!
+ 604: a9c16020 ldp x0, x24, [x1, #16]!
+ 608: 288052c0 stp w0, w20, [x22], #0
+ 60c: 28fa31d1 ldp w17, w12, [x14], #-48
+ 610: 68ce682a ldpsw x10, x26, [x1], #112
+ 614: a8ba61b4 stp x20, x24, [x13], #-96
+ 618: a8c330e1 ldp x1, x12, [x7], #48
+ 61c: 28362ae5 stnp w5, w10, [x23, #-80]
+ 620: 287a2b08 ldnp w8, w10, [x24, #-48]
+ 624: a8043d6b stnp x11, x15, [x11, #64]
+ 628: a84470a9 ldnp x9, x28, [x5, #64]
+ 62c: 0c40728b ld1 {v11.8b}, [x20]
+ 630: 4cdfa113 ld1 {v19.16b, v20.16b}, [x8], #32
+ 634: 0cc36c43 ld1 {v3.1d-v5.1d}, [x2], x3
+ 638: 4cdf2475 ld1 {v21.8h-v24.8h}, [x3], #64
+ 63c: 0d40c0ae ld1r {v14.8b}, [x5]
+ 640: 4ddfcb6d ld1r {v13.4s}, [x27], #4
+ 644: 0dc0ce71 ld1r {v17.1d}, [x19], x0
+ 648: 4c408cbb ld2 {v27.2d, v28.2d}, [x5]
+ 64c: 0cdf849a ld2 {v26.4h, v27.4h}, [x4], #16
+ 650: 4d60c2e8 ld2r {v8.16b, v9.16b}, [x23]
+ 654: 0dffc94e ld2r {v14.2s, v15.2s}, [x10], #8
+ 658: 4df3ceaa ld2r {v10.2d, v11.2d}, [x21], x19
+ 65c: 4cde49d1 ld3 {v17.4s-v19.4s}, [x14], x30
+ 660: 0c404a94 ld3 {v20.2s-v22.2s}, [x20]
+ 664: 4d40e6b8 ld3r {v24.8h-v26.8h}, [x21]
+ 668: 4ddfe83a ld3r {v26.4s-v28.4s}, [x1], #12
+ 66c: 0dc0ec4c ld3r {v12.1d-v14.1d}, [x2], x0
+ 670: 4cdf04d5 ld4 {v21.8h-v24.8h}, [x6], #64
+ 674: 0cd60391 ld4 {v17.8b-v20.8b}, [x28], x22
+ 678: 0d60e333 ld4r {v19.8b-v22.8b}, [x25]
+ 67c: 0dffe6e6 ld4r {v6.4h-v9.4h}, [x23], #8
+ 680: 0dfae928 ld4r {v8.2s-v11.2s}, [x9], x26
+ 684: ba5fd3e3 ccmn xzr, xzr, #0x3, le
+ 688: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
+ 68c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
+ 690: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
+ 694: 93df03ff ror xzr, xzr, #0
+ 698: c820ffff stlxp w0, xzr, xzr, [sp]
+ 69c: 8822fc7f stlxp w2, wzr, wzr, [x3]
+ 6a0: c8247cbf stxp w4, xzr, xzr, [x5]
+ 6a4: 88267fff stxp w6, wzr, wzr, [sp]
+ 6a8: 4e010fe0 dup v0.16b, wzr
+ 6ac: 4e081fe1 mov v1.d[0], xzr
+ 6b0: 4e0c1fe1 mov v1.s[1], wzr
+ 6b4: 4e0a1fe1 mov v1.h[2], wzr
+ 6b8: 4e071fe1 mov v1.b[3], wzr
+ 6bc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
+ 6c0: 05a08020 mov z0.s, p0/m, s1
+ 6c4: 04b0e3e0 incw x0
+ 6c8: 0470e7e1 dech x1
+ 6cc: 042f9c20 lsl z0.b, z1.b, #7
+ 6d0: 043f9c35 lsl z21.h, z1.h, #15
+ 6d4: 047f9c20 lsl z0.s, z1.s, #31
+ 6d8: 04ff9c20 lsl z0.d, z1.d, #63
+ 6dc: 04299420 lsr z0.b, z1.b, #7
+ 6e0: 04319160 asr z0.h, z11.h, #15
+ 6e4: 0461943e lsr z30.s, z1.s, #31
+ 6e8: 04a19020 asr z0.d, z1.d, #63
+ 6ec: 042053ff addvl sp, x0, #31
+ 6f0: 047f5401 addpl x1, sp, #-32
+ 6f4: 25208028 cntp x8, p0, p1.b
+ 6f8: 2538cfe0 mov z0.b, #127
+ 6fc: 2578d001 mov z1.h, #-128
+ 700: 25b8efe2 mov z2.s, #32512
+ 704: 25f8f007 mov z7.d, #-32768
+ 708: a400a3e0 ld1b {z0.b}, p0/z, [sp]
+ 70c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
+ 710: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
+ 714: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
+ 718: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
+ 71c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
+ 720: e400fbf6 st1b {z22.b}, p6, [sp]
+ 724: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
+ 728: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
+ 72c: e4014be0 st1b {z0.b}, p2, [sp, x1]
+ 730: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
+ 734: e5e85000 st1d {z0.d}, p4, [x0, x8, lsl #3]
+ 738: 858043e0 ldr z0, [sp]
+ 73c: 85a043ff ldr z31, [sp, #-256, mul vl]
+ 740: e59f5d08 str z8, [x8, #255, mul vl]
+ 744: 1e601000 fmov d0, #2.000000000000000000e+00
+ 748: 1e603000 fmov d0, #2.125000000000000000e+00
+ 74c: 1e621000 fmov d0, #4.000000000000000000e+00
+ 750: 1e623000 fmov d0, #4.250000000000000000e+00
+ 754: 1e641000 fmov d0, #8.000000000000000000e+00
+ 758: 1e643000 fmov d0, #8.500000000000000000e+00
+ 75c: 1e661000 fmov d0, #1.600000000000000000e+01
+ 760: 1e663000 fmov d0, #1.700000000000000000e+01
+ 764: 1e681000 fmov d0, #1.250000000000000000e-01
+ 768: 1e683000 fmov d0, #1.328125000000000000e-01
+ 76c: 1e6a1000 fmov d0, #2.500000000000000000e-01
+ 770: 1e6a3000 fmov d0, #2.656250000000000000e-01
+ 774: 1e6c1000 fmov d0, #5.000000000000000000e-01
+ 778: 1e6c3000 fmov d0, #5.312500000000000000e-01
+ 77c: 1e6e1000 fmov d0, #1.000000000000000000e+00
+ 780: 1e6e3000 fmov d0, #1.062500000000000000e+00
+ 784: 1e701000 fmov d0, #-2.000000000000000000e+00
+ 788: 1e703000 fmov d0, #-2.125000000000000000e+00
+ 78c: 1e721000 fmov d0, #-4.000000000000000000e+00
+ 790: 1e723000 fmov d0, #-4.250000000000000000e+00
+ 794: 1e741000 fmov d0, #-8.000000000000000000e+00
+ 798: 1e743000 fmov d0, #-8.500000000000000000e+00
+ 79c: 1e761000 fmov d0, #-1.600000000000000000e+01
+ 7a0: 1e763000 fmov d0, #-1.700000000000000000e+01
+ 7a4: 1e781000 fmov d0, #-1.250000000000000000e-01
+ 7a8: 1e783000 fmov d0, #-1.328125000000000000e-01
+ 7ac: 1e7a1000 fmov d0, #-2.500000000000000000e-01
+ 7b0: 1e7a3000 fmov d0, #-2.656250000000000000e-01
+ 7b4: 1e7c1000 fmov d0, #-5.000000000000000000e-01
+ 7b8: 1e7c3000 fmov d0, #-5.312500000000000000e-01
+ 7bc: 1e7e1000 fmov d0, #-1.000000000000000000e+00
+ 7c0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
+ 7c4: f82b82af swp x11, x15, [x21]
+ 7c8: f83700a8 ldadd x23, x8, [x5]
+ 7cc: f8271106 ldclr x7, x6, [x8]
+ 7d0: f82e22ee ldeor x14, x14, [x23]
+ 7d4: f82a3019 ldset x10, x25, [x0]
+ 7d8: f82552a9 ldsmin x5, x9, [x21]
+ 7dc: f824423b ldsmax x4, x27, [x17]
+ 7e0: f82a71a6 ldumin x10, x6, [x13]
+ 7e4: f8236203 ldumax x3, x3, [x16]
+ 7e8: f8a9805c swpa x9, x28, [x2]
+ 7ec: f8b70022 ldadda x23, x2, [x1]
+ 7f0: f8a410fa ldclra x4, x26, [x7]
+ 7f4: f8a02143 ldeora x0, x3, [x10]
+ 7f8: f8b83079 ldseta x24, x25, [x3]
+ 7fc: f8ab5028 ldsmina x11, x8, [x1]
+ 800: f8b043ad ldsmaxa x16, x13, [x29]
+ 804: f8a670a0 ldumina x6, x0, [x5]
+ 808: f8b061b1 ldumaxa x16, x17, [x13]
+ 80c: f8eb81db swpal x11, x27, [x14]
+ 810: f8e202ad ldaddal x2, x13, [x21]
+ 814: f8f6119f ldclral x22, xzr, [x12]
+ 818: f8e721fe ldeoral x7, x30, [x15]
+ 81c: f8e731f0 ldsetal x7, x16, [x15]
+ 820: f8f051ba ldsminal x16, x26, [x13]
+ 824: f8f74379 ldsmaxal x23, x25, [x27]
+ 828: f8e473ee lduminal x4, x14, [sp]
+ 82c: f8f86221 ldumaxal x24, x1, [x17]
+ 830: f8628308 swpl x2, x8, [x24]
+ 834: f874027b ldaddl x20, x27, [x19]
+ 838: f87310d1 ldclrl x19, x17, [x6]
+ 83c: f86e235c ldeorl x14, x28, [x26]
+ 840: f8623270 ldsetl x2, x16, [x19]
+ 844: f86e5090 ldsminl x14, x16, [x4]
+ 848: f8794128 ldsmaxl x25, x8, [x9]
+ 84c: f86a73a5 lduminl x10, x5, [x29]
+ 850: f86661c2 ldumaxl x6, x2, [x14]
+ 854: b831808b swp w17, w11, [x4]
+ 858: b82701f0 ldadd w7, w16, [x15]
+ 85c: b82b1139 ldclr w11, w25, [x9]
+ 860: b823200e ldeor w3, w14, [x0]
+ 864: b820301e ldset w0, w30, [x0]
+ 868: b826538a ldsmin w6, w10, [x28]
+ 86c: b82740ce ldsmax w7, w14, [x6]
+ 870: b826701e ldumin w6, w30, [x0]
+ 874: b83663be ldumax w22, w30, [x29]
+ 878: b8b0826e swpa w16, w14, [x19]
+ 87c: b8b50323 ldadda w21, w3, [x25]
+ 880: b8a21270 ldclra w2, w16, [x19]
+ 884: b8ba22f4 ldeora w26, w20, [x23]
+ 888: b8b133e6 ldseta w17, w6, [sp]
+ 88c: b8a553d7 ldsmina w5, w23, [x30]
+ 890: b8ab41cc ldsmaxa w11, w12, [x14]
+ 894: b8a271b4 ldumina w2, w20, [x13]
+ 898: b8af6291 ldumaxa w15, w17, [x20]
+ 89c: b8e682fc swpal w6, w28, [x23]
+ 8a0: b8fb01b0 ldaddal w27, w16, [x13]
+ 8a4: b8e21317 ldclral w2, w23, [x24]
+ 8a8: b8e0215c ldeoral w0, w28, [x10]
+ 8ac: b8e330af ldsetal w3, w15, [x5]
+ 8b0: b8e353ab ldsminal w3, w11, [x29]
+ 8b4: b8f640db ldsmaxal w22, w27, [x6]
+ 8b8: b8f17214 lduminal w17, w20, [x16]
+ 8bc: b8f760ef ldumaxal w23, w15, [x7]
+ 8c0: b86881d0 swpl w8, w16, [x14]
+ 8c4: b87702f0 ldaddl w23, w16, [x23]
+ 8c8: b87c10ec ldclrl w28, w12, [x7]
+ 8cc: b87c2267 ldeorl w28, w7, [x19]
+ 8d0: b867316c ldsetl w7, w12, [x11]
+ 8d4: b86a529f stsminl w10, [x20]
+ 8d8: b86943e8 ldsmaxl w9, w8, [sp]
+ 8dc: b86a7048 lduminl w10, w8, [x2]
+ 8e0: b87163ff stumaxl w17, [sp]
+ 8e4: 047600e2 add z2.h, z7.h, z22.h
+ 8e8: 04be06de sub z30.s, z22.s, z30.s
+ 8ec: 65d902ca fadd z10.d, z22.d, z25.d
+ 8f0: 65cc0a17 fmul z23.d, z16.d, z12.d
+ 8f4: 65d90623 fsub z3.d, z17.d, z25.d
+ 8f8: 0496a099 abs z25.s, p0/m, z4.s
+ 8fc: 04401b57 add z23.h, p6/m, z23.h, z26.h
+ 900: 04d08226 asr z6.d, p0/m, z6.d, z17.d
+ 904: 04daac77 cnt z23.d, p3/m, z3.d
+ 908: 04939d2b lsl z11.s, p7/m, z11.s, z9.s
+ 90c: 04919c7b lsr z27.s, p7/m, z27.s, z3.s
+ 910: 04901049 mul z9.s, p4/m, z9.s, z2.s
+ 914: 0417a9f0 neg z16.b, p2/m, z15.b
+ 918: 04dea929 not z9.d, p2/m, z9.d
+ 91c: 048816ea smax z10.s, p5/m, z10.s, z23.s
+ 920: 040a172d smin z13.b, p5/m, z13.b, z25.b
+ 924: 04811413 sub z19.s, p5/m, z19.s, z0.s
+ 928: 04dca2d1 fabs z17.d, p0/m, z22.d
+ 92c: 65808a09 fadd z9.s, p2/m, z9.s, z16.s
+ 930: 658d9411 fdiv z17.s, p5/m, z17.s, z0.s
+ 934: 6586947d fmax z29.s, p5/m, z29.s, z3.s
+ 938: 65878e21 fmin z1.s, p3/m, z1.s, z17.s
+ 93c: 65c2880e fmul z14.d, p2/m, z14.d, z0.d
+ 940: 04ddb2d3 fneg z19.d, p4/m, z22.d
+ 944: 65c2a5f1 frintm z17.d, p1/m, z15.d
+ 948: 65c0b088 frintn z8.d, p4/m, z4.d
+ 94c: 65c1b3a5 frintp z5.d, p4/m, z29.d
+ 950: 65cda26b fsqrt z11.d, p0/m, z19.d
+ 954: 65c1938a fsub z10.d, p4/m, z10.d, z28.d
+ 958: 65eb0ded fmla z13.d, p3/m, z15.d, z11.d
+ 95c: 65af3e86 fmls z6.s, p7/m, z20.s, z15.s
+ 960: 65a749be fnmla z30.s, p2/m, z13.s, z7.s
+ 964: 65f379d6 fnmls z22.d, p6/m, z14.d, z19.d
+ 968: 04404f3e mla z30.h, p3/m, z25.h, z0.h
+ 96c: 04c16b0a mls z10.d, p2/m, z24.d, z1.d
+ 970: 04363226 and z6.d, z17.d, z22.d
+ 974: 04b1312a eor z10.d, z9.d, z17.d
+ 978: 04753182 orr z2.d, z12.d, z21.d
+ 97c: 049a39cf andv s15, p6, z14.s
+ 980: 04d82ce9 orv d9, p3, z7.d
+ 984: 0459353e eorv h30, p5, z9.h
+ 988: 04883347 smaxv s7, p4, z26.s
+ 98c: 048a2fb4 sminv s20, p3, z29.s
+ 990: 65872e1c fminv s28, p3, z16.s
+ 994: 65c62d26 fmaxv d6, p3, z9.d
+ 998: 6598346a fadda s10, p5, s10, z3.s
+ 99c: 04013915 uaddv d21, p6, z8.b
*/
static const unsigned int insns[] =
{
- 0x8b18ec0f, 0xcb9636d1, 0xab1ce74a, 0xeb184a19,
- 0x0b1c1ca8, 0x4b817388, 0x2b01004c, 0x6b5164b7,
- 0x8a0d5595, 0xaa9791f5, 0xca9bc316, 0xea82d1f6,
- 0x0a980e21, 0x2a862c45, 0x4a453037, 0x6a8e5180,
- 0x8a621cc1, 0xaa24bd1e, 0xcab4d6d1, 0xeaa591fd,
- 0x0a7d6efe, 0x2a2253ac, 0x4aa61187, 0x6aa755b0,
- 0x110b5a25, 0x31056e0a, 0x510f48ba, 0x710ac715,
- 0x910f6e0a, 0xb10a65ef, 0xd1009e98, 0xf10131aa,
- 0x121d4e67, 0x32043e25, 0x52132390, 0x72160b0e,
- 0x9273e76e, 0xb256416c, 0xd24b5002, 0xf266da8d,
- 0x14000000, 0x17ffffd7, 0x140001ee, 0x94000000,
- 0x97ffffd4, 0x940001eb, 0x3400000f, 0x34fffa2f,
- 0x34003d0f, 0x3500001c, 0x35fff9dc, 0x35003cbc,
- 0xb400001b, 0xb4fff97b, 0xb4003c5b, 0xb5000000,
- 0xb5fff900, 0xb5003be0, 0x1000000d, 0x10fff8ad,
- 0x10003b8d, 0x90000003, 0x36380015, 0x363ff835,
- 0x36383b15, 0x3748000f, 0x374ff7cf, 0x37483aaf,
- 0x12a14bee, 0x5283bb51, 0x72858ebb, 0x92c98881,
- 0xd2aa50d4, 0xf2afd9d4, 0x935c504d, 0x33133e90,
- 0x5309196b, 0x93595482, 0xb3424e0d, 0xd3481728,
- 0x138a3b7d, 0x93c66286, 0x54000000, 0x54fff5a0,
- 0x54003880, 0x54000001, 0x54fff541, 0x54003821,
- 0x54000002, 0x54fff4e2, 0x540037c2, 0x54000002,
- 0x54fff482, 0x54003762, 0x54000003, 0x54fff423,
- 0x54003703, 0x54000003, 0x54fff3c3, 0x540036a3,
- 0x54000004, 0x54fff364, 0x54003644, 0x54000005,
- 0x54fff305, 0x540035e5, 0x54000006, 0x54fff2a6,
- 0x54003586, 0x54000007, 0x54fff247, 0x54003527,
- 0x54000008, 0x54fff1e8, 0x540034c8, 0x54000009,
- 0x54fff189, 0x54003469, 0x5400000a, 0x54fff12a,
- 0x5400340a, 0x5400000b, 0x54fff0cb, 0x540033ab,
- 0x5400000c, 0x54fff06c, 0x5400334c, 0x5400000d,
- 0x54fff00d, 0x540032ed, 0x5400000e, 0x54ffefae,
- 0x5400328e, 0x5400000f, 0x54ffef4f, 0x5400322f,
- 0xd40d2881, 0xd40ea5c2, 0xd40518a3, 0xd42eca40,
- 0xd44a2e60, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
- 0xd5033fdf, 0xd5033d9f, 0xd5033bbf, 0xd61f0120,
- 0xd63f0120, 0xc8027d7d, 0xc816ff85, 0xc85f7e8e,
- 0xc85ffe7d, 0xc89ffea6, 0xc8dffc73, 0x880c7f63,
- 0x8811fdfa, 0x885f7dcd, 0x885fff4c, 0x889ffe28,
- 0x88dfffd5, 0x48007d6f, 0x4811fc34, 0x485f7d1d,
- 0x485ffd91, 0x489ffc8b, 0x48dffc90, 0x080e7c85,
- 0x081bfe11, 0x085f7f66, 0x085fff1b, 0x089ffe8a,
- 0x08dfff49, 0xc87f7b85, 0xc87fa66a, 0xc82b5590,
- 0xc82adc94, 0x887f0416, 0x887f8503, 0x88205fc9,
- 0x8837c560, 0xf81e1146, 0xb81fb007, 0x381f3205,
- 0x7801f27e, 0xf8477130, 0xb843b208, 0x385f918a,
- 0x785da12e, 0x389f83d8, 0x78817087, 0x78dd91d1,
- 0xb89e136b, 0xfc4410ec, 0xbc5fe200, 0xfc15f2ed,
- 0xbc1c2075, 0xf8064ca2, 0xb81a4c29, 0x381fbfdb,
- 0x7800cdfb, 0xf852ce24, 0xb841eef5, 0x385f9e2d,
- 0x785cec19, 0x389ebea1, 0x789caebc, 0x78c02c8b,
- 0xb883dd31, 0xfc427e7d, 0xbc5abed6, 0xfc11ff29,
- 0xbc1f1c49, 0xf81be6ed, 0xb800a611, 0x381e05c1,
- 0x78006411, 0xf855473b, 0xb85da72d, 0x385e372b,
- 0x784144be, 0x389f94e9, 0x789c2460, 0x78c1f5c7,
- 0xb8827771, 0xfc515491, 0xbc4226ba, 0xfc1c7625,
- 0xbc1935ad, 0xf824da06, 0xb834db09, 0x38237ba3,
- 0x783e6a2a, 0xf867497b, 0xb87949ee, 0x387379d8,
- 0x7866c810, 0x38acd98a, 0x78b0499a, 0x78ee781a,
- 0xb8bbf971, 0xfc73d803, 0xbc6979fa, 0xfc30e9ab,
- 0xbc355a7a, 0xf91886a8, 0xb918ef6a, 0x391b15db,
- 0x791ac0f0, 0xf958753b, 0xb95a1958, 0x395b3f18,
- 0x795800b4, 0x39988891, 0x799a81ae, 0x79dd172a,
- 0xb9981342, 0xfd5d21da, 0xbd5e7c9c, 0xfd1b526e,
- 0xbd18df97, 0x58002268, 0x18ffdf51, 0xf8951080,
- 0xd8000000, 0xf8a4c900, 0xf999e180, 0x1a150374,
- 0x3a060227, 0x5a1900c5, 0x7a0e017e, 0x9a0b0223,
- 0xba110159, 0xda170207, 0xfa050144, 0x0b2973c9,
- 0x2b30a8a0, 0xcb3b8baf, 0x6b21f12b, 0x8b264f02,
- 0xab3a70d3, 0xcb39ef48, 0xeb29329a, 0x3a5a41a7,
- 0x7a54310f, 0xba4302c8, 0xfa58a04a, 0x3a50490d,
- 0x7a4c0a01, 0xba5f79e3, 0xfa4c0aef, 0x1a9a30ee,
- 0x1a9ed763, 0x5a9702ab, 0x5a95c7da, 0x9a8d835c,
- 0x9a909471, 0xda8380ab, 0xda93c461, 0x5ac00120,
- 0x5ac005da, 0x5ac00a2d, 0x5ac0128b, 0x5ac0163c,
- 0xdac0008d, 0xdac007c1, 0xdac009cd, 0xdac00d05,
- 0xdac01322, 0xdac01514, 0x1adb0b35, 0x1ad00d4d,
- 0x1ad1203c, 0x1aca26f9, 0x1ac72867, 0x1ace2fce,
- 0x9acf0acc, 0x9acd0f22, 0x9ad522e7, 0x9ac0258b,
- 0x9adc293e, 0x9ad62cad, 0x9bc47ea5, 0x9b477c51,
- 0x1b11318c, 0x1b01edfe, 0x9b117662, 0x9b03fae4,
- 0x9b313eef, 0x9b21b59b, 0x9bac45a6, 0x9ba6a839,
- 0x1e240871, 0x1e3518b0, 0x1e312b63, 0x1e2f3959,
- 0x1e200a2a, 0x1e630b5c, 0x1e7b1804, 0x1e6229dc,
- 0x1e773b4c, 0x1e610bcf, 0x1f0534a4, 0x1f1c85b5,
- 0x1f3d1c71, 0x1f3d6b37, 0x1f5e68ee, 0x1f4aa4f6,
- 0x1f6e24e7, 0x1f6f630e, 0x1e204056, 0x1e20c060,
- 0x1e214229, 0x1e21c178, 0x1e22c32f, 0x1e604064,
- 0x1e60c2da, 0x1e61427e, 0x1e61c1cc, 0x1e6240f1,
- 0x1e3801d8, 0x9e38034d, 0x1e780022, 0x9e780165,
- 0x1e22026e, 0x9e2202c1, 0x1e62023b, 0x9e620136,
- 0x1e26006e, 0x9e66022c, 0x1e270368, 0x9e67039d,
- 0x1e3e2000, 0x1e692180, 0x1e202148, 0x1e602328,
- 0x292e7b68, 0x294a4f15, 0x69626c50, 0xa93814d5,
- 0xa97e679d, 0x29903408, 0x29ec5039, 0x69fc62ce,
- 0xa98504d1, 0xa9fc4735, 0x28b05691, 0x28c8705c,
- 0x68e07953, 0xa8bf3e31, 0xa8fe0331, 0x283c170e,
- 0x284e4c37, 0xa80419cb, 0xa8722f62, 0x0c407230,
- 0x4cdfa13d, 0x0cd56f1e, 0x4cdf2440, 0x0d40c134,
- 0x4ddfc811, 0x0ddaced5, 0x4c408f33, 0x0cdf84aa,
- 0x4d60c30a, 0x0dffcbad, 0x4de2cf96, 0x4ccb489e,
- 0x0c40481d, 0x4d40e777, 0x4ddfe943, 0x0dd6edd3,
- 0x4cdf040e, 0x0cd902de, 0x0d60e019, 0x0dffe50a,
- 0x0dfce8c1, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
+ 0x8b8e677b, 0xcb512964, 0xab998627, 0xeb9416cd,
+ 0x0b83438a, 0x4b463c55, 0x2b9b2406, 0x6b882b65,
+ 0x8a879c8c, 0xaa16cb75, 0xca80baa3, 0xea855955,
+ 0x0a1d5aad, 0x2a504951, 0x4a976cf0, 0x6a8c30ca,
+ 0x8a275b33, 0xaa27d459, 0xcab70ee9, 0xeaadc8c5,
+ 0x0a2a26af, 0x2abe06b1, 0x4a3d4f87, 0x6ab632d9,
+ 0x110c5346, 0x3107aa23, 0x5107eea5, 0x710dcf76,
+ 0x9103d10c, 0xb10e811d, 0xd10a087a, 0xf109d1fd,
+ 0x1209afd5, 0x32099d95, 0x5202c62b, 0x720897da,
+ 0x920e36f9, 0xb243f1de, 0xd263d09a, 0xf24fd01a,
+ 0x14000000, 0x17ffffd7, 0x1400023e, 0x94000000,
+ 0x97ffffd4, 0x9400023b, 0x3400001c, 0x34fffa3c,
+ 0x3400471c, 0x35000011, 0x35fff9d1, 0x350046b1,
+ 0xb4000019, 0xb4fff979, 0xb4004659, 0xb5000002,
+ 0xb5fff902, 0xb50045e2, 0x1000001d, 0x10fff8bd,
+ 0x1000459d, 0x9000001d, 0x36300006, 0x3637f826,
+ 0x36304506, 0x37100015, 0x3717f7d5, 0x371044b5,
+ 0x128155e8, 0x52a5762b, 0x72acb59a, 0x92866a8d,
+ 0xd2e2d8a6, 0xf2c54450, 0x93516bde, 0x330f3124,
+ 0x5301168f, 0x9353391b, 0xb355741e, 0xd3562f5b,
+ 0x13866d8c, 0x93d6b5b3, 0x54000000, 0x54fff5a0,
+ 0x54004280, 0x54000001, 0x54fff541, 0x54004221,
+ 0x54000002, 0x54fff4e2, 0x540041c2, 0x54000002,
+ 0x54fff482, 0x54004162, 0x54000003, 0x54fff423,
+ 0x54004103, 0x54000003, 0x54fff3c3, 0x540040a3,
+ 0x54000004, 0x54fff364, 0x54004044, 0x54000005,
+ 0x54fff305, 0x54003fe5, 0x54000006, 0x54fff2a6,
+ 0x54003f86, 0x54000007, 0x54fff247, 0x54003f27,
+ 0x54000008, 0x54fff1e8, 0x54003ec8, 0x54000009,
+ 0x54fff189, 0x54003e69, 0x5400000a, 0x54fff12a,
+ 0x54003e0a, 0x5400000b, 0x54fff0cb, 0x54003dab,
+ 0x5400000c, 0x54fff06c, 0x54003d4c, 0x5400000d,
+ 0x54fff00d, 0x54003ced, 0x5400000e, 0x54ffefae,
+ 0x54003c8e, 0x5400000f, 0x54ffef4f, 0x54003c2f,
+ 0xd407da81, 0xd402d542, 0xd406dae3, 0xd4258fa0,
+ 0xd44d5960, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
+ 0xd5033fdf, 0xd503339f, 0xd50336bf, 0xd61f0160,
+ 0xd63f0320, 0xc80e7daf, 0xc81efc39, 0xc85f7c6d,
+ 0xc85ffea8, 0xc89fff8d, 0xc8dfffc8, 0x880d7f91,
+ 0x8815fe71, 0x885f7d03, 0x885ffebd, 0x889fff09,
+ 0x88dffcc2, 0x480c7e14, 0x4802fcbc, 0x485f7c61,
+ 0x485ffdb8, 0x489fff2f, 0x48dffe8a, 0x08057db0,
+ 0x080afe2f, 0x085f7e71, 0x085ffd3e, 0x089fff14,
+ 0x08dffc8a, 0xc87f2139, 0xc87faa07, 0xc8392d30,
+ 0xc827a5e5, 0x887f106c, 0x887f88b1, 0x882460c8,
+ 0x8824e60c, 0xf800b3ce, 0xb819f3a6, 0x381f9162,
+ 0x781ea114, 0xf85e33b4, 0xb85e6009, 0x3940204e,
+ 0x785e802d, 0x389f922d, 0x789f50f1, 0x78dc4103,
+ 0xb9800d8e, 0xfc5152a5, 0xbc5ca009, 0xfc05f10f,
+ 0xbc1f0016, 0xf8111c97, 0xb8186c11, 0x381fbd3a,
+ 0x781f8dd5, 0xf8417ce8, 0xb8416d0c, 0x38406f9b,
+ 0x785c6e66, 0x389ecca7, 0x789e0e36, 0x78dfedb1,
+ 0xb8816c9d, 0xfc5b2f88, 0xbc5fbd77, 0xfc1e9e89,
+ 0xbc199c65, 0xf802044d, 0xb803967e, 0x3800343d,
+ 0x781ef74a, 0xf85f442f, 0xb85fa4a1, 0x385f25f8,
+ 0x785fb63d, 0x389ef5e4, 0x789ca446, 0x78c1277b,
+ 0xb89b3729, 0xfc5507b5, 0xbc5ce53e, 0xfc1d2582,
+ 0xbc1c56a7, 0xf837598c, 0xb8364bce, 0x383a586c,
+ 0x783e49cb, 0xf8787918, 0xb87469ac, 0x38655896,
+ 0x786658bc, 0x38b97962, 0x78b9ead7, 0x78f6da83,
+ 0xb8aefba9, 0xfc7dfaf0, 0xbc747b87, 0xfc387a94,
+ 0xbc377ab9, 0xf9180c51, 0xb91b38fe, 0x391ca4e3,
+ 0x791a4c27, 0xf95ca767, 0xb9580e28, 0x3958ea20,
+ 0x795bd680, 0x399a4633, 0x799d80d3, 0x79dcf944,
+ 0xb99b249d, 0xfd5a143d, 0xbd59938f, 0xfd1b9347,
+ 0xbd1aa7c0, 0x58000019, 0x18000009, 0xf88692c0,
+ 0xd8ffdf00, 0xf8be7b80, 0xf99c8260, 0x1a180111,
+ 0x3a09022e, 0x5a190036, 0x7a13012f, 0x9a0b028f,
+ 0xba1e0164, 0xda060114, 0xfa0f02aa, 0x0b298d61,
+ 0x2b3cee24, 0xcb3ca7b5, 0x6b37d38b, 0x8b25f34c,
+ 0xab3e68d1, 0xcb210a87, 0xeb3eed3e, 0x3a4b0087,
+ 0x7a4571eb, 0xba5122e6, 0xfa4bc16a, 0x3a4519cc,
+ 0x7a5c1aef, 0xba5e3a27, 0xfa4c8bc0, 0x1a81537a,
+ 0x1a95d56e, 0x5a8f60de, 0x5a995451, 0x9a8780b0,
+ 0x9a9cc68a, 0xda8180e6, 0xda912756, 0x5ac000cb,
+ 0x5ac00760, 0x5ac00ba1, 0x5ac012b4, 0x5ac0158c,
+ 0xdac00278, 0xdac005f7, 0xdac00831, 0xdac00c7b,
+ 0xdac010be, 0xdac0140f, 0x1ad4080e, 0x1ad50d9b,
+ 0x1ada214c, 0x1ac6266e, 0x1ade2a7b, 0x1ad02dc6,
+ 0x9ac209b1, 0x9ac20fa0, 0x9ac2220c, 0x9add26e9,
+ 0x9add2a26, 0x9ada2fce, 0x9bda7f11, 0x9b4e7f54,
+ 0x1b021d1b, 0x1b19b1bc, 0x9b0a6d24, 0x9b08f956,
+ 0x9b391694, 0x9b2beed6, 0x9bac4cc4, 0x9ba881f1,
+ 0x1e2a08b6, 0x1e301904, 0x1e262919, 0x1e393b66,
+ 0x1e290aea, 0x1e6c0a36, 0x1e74180b, 0x1e6f2980,
+ 0x1e643acf, 0x1e79083d, 0x1f131769, 0x1f06e87a,
+ 0x1f285184, 0x1f354539, 0x1f5e5867, 0x1f4aab61,
+ 0x1f760511, 0x1f626f8e, 0x1e2043db, 0x1e20c025,
+ 0x1e214277, 0x1e21c23c, 0x1e22c0d9, 0x1e6041d4,
+ 0x1e60c151, 0x1e61422a, 0x1e61c235, 0x1e6241f5,
+ 0x1e380167, 0x9e3803a2, 0x1e780323, 0x9e78011c,
+ 0x1e22006b, 0x9e2202a2, 0x1e62033d, 0x9e620073,
+ 0x1e2603b4, 0x9e660237, 0x1e270380, 0x9e670289,
+ 0x1e2c20e0, 0x1e6e21a0, 0x1e202188, 0x1e602028,
+ 0x29380acc, 0x2966271b, 0x696a130f, 0xa9015405,
+ 0xa9735d26, 0x29820fa0, 0x29ee403d, 0x69c24ebb,
+ 0xa9b545a6, 0xa9c16020, 0x288052c0, 0x28fa31d1,
+ 0x68ce682a, 0xa8ba61b4, 0xa8c330e1, 0x28362ae5,
+ 0x287a2b08, 0xa8043d6b, 0xa84470a9, 0x0c40728b,
+ 0x4cdfa113, 0x0cc36c43, 0x4cdf2475, 0x0d40c0ae,
+ 0x4ddfcb6d, 0x0dc0ce71, 0x4c408cbb, 0x0cdf849a,
+ 0x4d60c2e8, 0x0dffc94e, 0x4df3ceaa, 0x4cde49d1,
+ 0x0c404a94, 0x4d40e6b8, 0x4ddfe83a, 0x0dc0ec4c,
+ 0x4cdf04d5, 0x0cd60391, 0x0d60e333, 0x0dffe6e6,
+ 0x0dfae928, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
- 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
- 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
- 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
- 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
- 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
- 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
- 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
- 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
- 0xf83081f4, 0xf8220387, 0xf834132a, 0xf836204b,
- 0xf821326a, 0xf82e5075, 0xf83c41bb, 0xf83172be,
- 0xf83b63b0, 0xf8be8009, 0xf8bc039b, 0xf8b51159,
- 0xf8bf21f4, 0xf8a131d9, 0xf8b553ba, 0xf8a8433d,
- 0xf8ad7322, 0xf8af6017, 0xf8e38041, 0xf8fc0283,
- 0xf8ee11df, 0xf8e7205c, 0xf8e030ab, 0xf8eb528e,
- 0xf8ff4044, 0xf8fa72c0, 0xf8f161a1, 0xf877829a,
- 0xf86e018b, 0xf86c11ff, 0xf87b210e, 0xf86a333e,
- 0xf8765207, 0xf8614110, 0xf8617341, 0xf86061f7,
- 0xb82b8110, 0xb82101c7, 0xb830113f, 0xb83621a6,
- 0xb82b308d, 0xb8305016, 0xb83c415f, 0xb8307105,
- 0xb83a61f4, 0xb8bb8206, 0xb8bf005f, 0xb8b8111c,
- 0xb8af22e9, 0xb8ba30e2, 0xb8a351f1, 0xb8b342a5,
- 0xb8a7719a, 0xb8ac63a7, 0xb8e98288, 0xb8e803df,
- 0xb8e01186, 0xb8f12057, 0xb8e0303e, 0xb8f651e3,
- 0xb8f941b5, 0xb8ed7378, 0xb8f46163, 0xb86382ad,
- 0xb87a034f, 0xb8691053, 0xb87820fd, 0xb87d31f9,
- 0xb86b50fe, 0xb86b40c2, 0xb87071cb, 0xb8656168,
+ 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
+ 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
+ 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
+ 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
+ 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
+ 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
+ 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
+ 0xe4a84fe0, 0xe5e85000, 0x858043e0, 0x85a043ff,
+ 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
+ 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
+ 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
+ 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
+ 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
+ 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
+ 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
+ 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
+ 0x1e7e3000, 0xf82b82af, 0xf83700a8, 0xf8271106,
+ 0xf82e22ee, 0xf82a3019, 0xf82552a9, 0xf824423b,
+ 0xf82a71a6, 0xf8236203, 0xf8a9805c, 0xf8b70022,
+ 0xf8a410fa, 0xf8a02143, 0xf8b83079, 0xf8ab5028,
+ 0xf8b043ad, 0xf8a670a0, 0xf8b061b1, 0xf8eb81db,
+ 0xf8e202ad, 0xf8f6119f, 0xf8e721fe, 0xf8e731f0,
+ 0xf8f051ba, 0xf8f74379, 0xf8e473ee, 0xf8f86221,
+ 0xf8628308, 0xf874027b, 0xf87310d1, 0xf86e235c,
+ 0xf8623270, 0xf86e5090, 0xf8794128, 0xf86a73a5,
+ 0xf86661c2, 0xb831808b, 0xb82701f0, 0xb82b1139,
+ 0xb823200e, 0xb820301e, 0xb826538a, 0xb82740ce,
+ 0xb826701e, 0xb83663be, 0xb8b0826e, 0xb8b50323,
+ 0xb8a21270, 0xb8ba22f4, 0xb8b133e6, 0xb8a553d7,
+ 0xb8ab41cc, 0xb8a271b4, 0xb8af6291, 0xb8e682fc,
+ 0xb8fb01b0, 0xb8e21317, 0xb8e0215c, 0xb8e330af,
+ 0xb8e353ab, 0xb8f640db, 0xb8f17214, 0xb8f760ef,
+ 0xb86881d0, 0xb87702f0, 0xb87c10ec, 0xb87c2267,
+ 0xb867316c, 0xb86a529f, 0xb86943e8, 0xb86a7048,
+ 0xb87163ff, 0x047600e2, 0x04be06de, 0x65d902ca,
+ 0x65cc0a17, 0x65d90623, 0x0496a099, 0x04401b57,
+ 0x04d08226, 0x04daac77, 0x04939d2b, 0x04919c7b,
+ 0x04901049, 0x0417a9f0, 0x04dea929, 0x048816ea,
+ 0x040a172d, 0x04811413, 0x04dca2d1, 0x65808a09,
+ 0x658d9411, 0x6586947d, 0x65878e21, 0x65c2880e,
+ 0x04ddb2d3, 0x65c2a5f1, 0x65c0b088, 0x65c1b3a5,
+ 0x65cda26b, 0x65c1938a, 0x65eb0ded, 0x65af3e86,
+ 0x65a749be, 0x65f379d6, 0x04404f3e, 0x04c16b0a,
+ 0x04363226, 0x04b1312a, 0x04753182, 0x049a39cf,
+ 0x04d82ce9, 0x0459353e, 0x04883347, 0x048a2fb4,
+ 0x65872e1c, 0x65c62d26, 0x6598346a, 0x04013915,
};
// END Generated code -- do not edit
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 8f0d7f5..13daa4e 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -152,6 +152,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21);
// Java stack pointer
REGISTER_DECLARATION(Register, esp, r20);
+// Preserved predicate register with all elements set TRUE.
+REGISTER_DECLARATION(PRegister, ptrue, p7);
+
#define assert_cond(ARG1) assert(ARG1, #ARG1)
namespace asm_util {
@@ -581,6 +584,18 @@ class Address {
void lea(MacroAssembler *, Register) const;
static bool offset_ok_for_immed(int64_t offset, uint shift = 0);
+
+ static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
+ if (offset % vl == 0) {
+ // Convert address offset into sve imm offset (MUL VL).
+ int sve_offset = offset / vl;
+ if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
+ // sve_offset can be encoded
+ return true;
+ }
+ }
+ return false;
+ }
};
// Convience classes
@@ -2473,13 +2488,18 @@ public:
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
}
- void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
- starti;
- f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
- f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
- rf(Vn, 5), rf(Rd, 0);
+#define INSN(NAME, op) \
+ void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
+ starti; \
+ f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \
+ f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \
+ rf(Vn, 5), rf(Rd, 0); \
}
+ INSN(umov, 0b001111);
+ INSN(smov, 0b001011);
+#undef INSN
+
#define INSN(NAME, opc, opc2, isSHR) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
@@ -2721,6 +2741,240 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V
f(0, 10), rf(Vn, 5), rf(Vd, 0);
}
+// SVE arithmetics - unpredicated
+#define INSN(NAME, opcode) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T != Q, "invalid register variant"); \
+ f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+ INSN(sve_add, 0b000);
+ INSN(sve_sub, 0b001);
+#undef INSN
+
+// SVE floating-point arithmetic - unpredicated
+#define INSN(NAME, opcode) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T == S || T == D, "invalid register variant"); \
+ f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
+ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+
+ INSN(sve_fadd, 0b000);
+ INSN(sve_fmul, 0b010);
+ INSN(sve_fsub, 0b001);
+#undef INSN
+
+private:
+ void sve_predicate_reg_insn(unsigned op24, unsigned op13,
+ FloatRegister Zd_or_Vd, SIMD_RegVariant T,
+ PRegister Pg, FloatRegister Zn_or_Vn) {
+ starti;
+ f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
+ pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
+ }
+
+public:
+
+// SVE integer arithmetics - predicate
+#define INSN(NAME, op1, op2) \
+ void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
+ assert(T != Q, "invalid register variant"); \
+ sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
+ }
+
+ INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
+ INSN(sve_add, 0b00000100, 0b000000000); // vector add
+ INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
+ INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
+ INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
+ INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
+ INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
+ INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
+ INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
+ INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
+ INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
+ INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
+ INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
+ INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
+ INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
+ INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
+ INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
+ INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
+ INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
+#undef INSN
+
+// SVE floating-point arithmetics - predicate
+#define INSN(NAME, op1, op2) \
+ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
+ assert(T == S || T == D, "invalid register variant"); \
+ sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
+ }
+
+ INSN(sve_fabs, 0b00000100, 0b011100101);
+ INSN(sve_fadd, 0b01100101, 0b000000100);
+ INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
+ INSN(sve_fdiv, 0b01100101, 0b001101100);
+ INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
+ INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
+ INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
+ INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
+ INSN(sve_fmul, 0b01100101, 0b000010100);
+ INSN(sve_fneg, 0b00000100, 0b011101101);
+ INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
+ INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
+ INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
+ INSN(sve_fsqrt, 0b01100101, 0b001101101);
+ INSN(sve_fsub, 0b01100101, 0b000001100);
+#undef INSN
+
+ // SVE multiple-add/sub - predicated
+#define INSN(NAME, op0, op1, op2) \
+ void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ assert(T != Q, "invalid size"); \
+ f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
+ f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
+ }
+
+ INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
+ INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
+ INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
+ INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
+ INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
+ INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
+#undef INSN
+
+// SVE bitwise logical - unpredicated
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
+ starti; \
+ f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
+ rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+ INSN(sve_and, 0b00);
+ INSN(sve_eor, 0b10);
+ INSN(sve_orr, 0b01);
+#undef INSN
+
+// SVE shift immediate - unpredicated
+#define INSN(NAME, opc, isSHR) \
+ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
+ starti; \
+ /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
+ * for shift right is calculated as: \
+ * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
+ * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
+ * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
+ * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
+ * for shift left is calculated as: \
+ * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
+ * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
+ * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
+ * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
+ */ \
+ assert(T != Q, "Invalid register variant"); \
+ if (isSHR) { \
+ assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
+ } else { \
+ assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
+ } \
+ int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
+ int encodedShift = isSHR ? cVal - shift : cVal + shift; \
+ int tszh = encodedShift >> 5; \
+ int tszl_imm = encodedShift & 0x1f; \
+ f(0b00000100, 31, 24); \
+ f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
+ f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
+ }
+
+ INSN(sve_asr, 0b100, /* isSHR = */ true);
+ INSN(sve_lsl, 0b111, /* isSHR = */ false);
+ INSN(sve_lsr, 0b101, /* isSHR = */ true);
+#undef INSN
+
+private:
+
+ // Scalar base + immediate index
+ void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
+ SIMD_RegVariant T, int op1, int type, int op2) {
+ starti;
+ assert_cond(T >= type);
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+ f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+ }
+
+ // Scalar base + scalar index
+ void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
+ SIMD_RegVariant T, int op1, int type, int op2) {
+ starti;
+ assert_cond(T >= type);
+ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
+ rf(Xm, 16), f(op2, 15, 13);
+ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
+ }
+
+ void sve_ld_st1(FloatRegister Zt, PRegister Pg,
+ SIMD_RegVariant T, const Address &a,
+ int op1, int type, int imm_op2, int scalar_op2) {
+ switch (a.getMode()) {
+ case Address::base_plus_offset:
+ sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
+ break;
+ case Address::base_plus_offset_reg:
+ sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+public:
+
+// SVE load/store - predicated
+#define INSN(NAME, op1, type, imm_op2, scalar_op2) \
+ void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
+ assert(T != Q, "invalid register variant"); \
+ sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
+ }
+
+ INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
+ INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
+ INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
+ INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
+ INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
+ INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
+ INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
+ INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
+#undef INSN
+
+// SVE load/store - unpredicated
+#define INSN(NAME, op1) \
+ void NAME(FloatRegister Zt, const Address &a) { \
+ starti; \
+ assert(a.index() == noreg, "invalid address variant"); \
+ f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
+ f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
+ }
+
+ INSN(sve_ldr, 0b100); // LDR (vector)
+ INSN(sve_str, 0b111); // STR (vector)
+#undef INSN
+
+#define INSN(NAME, op) \
+ void NAME(Register Xd, Register Xn, int imm6) { \
+ starti; \
+ f(0b000001000, 31, 23), f(op, 22, 21); \
+ srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
+ }
+
+ INSN(sve_addvl, 0b01);
+ INSN(sve_addpl, 0b11);
+#undef INSN
+
// SVE inc/dec register by element count
#define INSN(NAME, op) \
void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
@@ -2734,6 +2988,45 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V
INSN(sve_dec, 1);
#undef INSN
+// SVE predicate count
+ void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
+ starti;
+ assert(T != Q, "invalid size");
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
+ prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
+ }
+
+ // SVE dup scalar
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
+ starti;
+ assert(T != Q, "invalid size");
+ f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
+ srf(Rn, 5), rf(Zd, 0);
+ }
+
+ // SVE dup imm
+ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
+ starti;
+ assert(T != Q, "invalid size");
+ int sh = 0;
+ if (imm8 <= 127 && imm8 >= -128) {
+ sh = 0;
+ } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
+ sh = 1;
+ imm8 = (imm8 >> 8);
+ } else {
+ guarantee(false, "invalid immediate");
+ }
+ f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
+ f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
+ }
+
+ void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
+ starti;
+ f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
+ f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
+ }
+
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}
diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
index 6ac54f2..a258528 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
@@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
ZSetupArguments setup_arguments(masm, stub);
__ mov(rscratch1, stub->slow_path());
__ blr(rscratch1);
+ if (UseSVE > 0) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
}
-
// Stub exit
__ b(*stub->continuation());
}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index c70d424..7cfa70a 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -2131,8 +2131,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
}
// Push lots of registers in the bit set supplied. Don't push sp.
-// Return the number of words pushed
+// Return the number of dwords pushed
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+ int words_pushed = 0;
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
+
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
@@ -2147,8 +2156,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return 0;
}
+ // SVE
+ if (use_sve && sve_vector_size_in_bytes > 16) {
+ sub(stack, stack, sve_vector_size_in_bytes * count);
+ for (int i = 0; i < count; i++) {
+ sve_str(as_FloatRegister(regs[i]), Address(stack, i));
+ }
+ return count * sve_vector_size_in_bytes / 8;
+ }
+
add(stack, stack, -count * wordSize * 2);
+ // NEON
if (count & 1) {
strq(as_FloatRegister(regs[0]), Address(stack));
i += 1;
@@ -2161,7 +2180,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return count;
}
+// Return the number of dwords poped
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+ int words_pushed = 0;
+ bool use_sve = false;
+ int sve_vector_size_in_bytes = 0;
+
+#ifdef COMPILER2
+ use_sve = Matcher::supports_scalable_vector();
+ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
+#endif
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
@@ -2176,6 +2204,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return 0;
}
+ // SVE
+ if (use_sve && sve_vector_size_in_bytes > 16) {
+ for (int i = count - 1; i >= 0; i--) {
+ sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
+ }
+ add(stack, stack, sve_vector_size_in_bytes * count);
+ return count * sve_vector_size_in_bytes / 8;
+ }
+
+ // NEON
if (count & 1) {
ldrq(as_FloatRegister(regs[0]), Address(stack));
i += 1;
@@ -2659,23 +2697,39 @@ void MacroAssembler::pop_call_clobbered_registers() {
pop(call_clobbered_registers() - RegSet::of(rscratch1, rscratch2), sp);
}
-void MacroAssembler::push_CPU_state(bool save_vectors) {
- int step = (save_vectors ? 8 : 4) * wordSize;
+void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
+ int sve_vector_size_in_bytes) {
push(RegSet::range(r0, r29), sp); // integer registers except lr & sp
- mov(rscratch1, -step);
- sub(sp, sp, step);
- for (int i = 28; i >= 4; i -= 4) {
- st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
- as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+ if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+ sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
+ sve_str(as_FloatRegister(i), Address(sp, i));
+ }
+ } else {
+ int step = (save_vectors ? 8 : 4) * wordSize;
+ mov(rscratch1, -step);
+ sub(sp, sp, step);
+ for (int i = 28; i >= 4; i -= 4) {
+ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+ as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
+ }
+ st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
- st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
-void MacroAssembler::pop_CPU_state(bool restore_vectors) {
- int step = (restore_vectors ? 8 : 4) * wordSize;
- for (int i = 0; i <= 28; i += 4)
- ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
- as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
+ int sve_vector_size_in_bytes) {
+ if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
+ for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
+ sve_ldr(as_FloatRegister(i), Address(sp, i));
+ }
+ add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
+ } else {
+ int step = (restore_vectors ? 8 : 4) * wordSize;
+ for (int i = 0; i <= 28; i += 4)
+ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
+ as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
+ }
// integer registers except lr & sp
pop(RegSet::range(r0, r17), sp);
@@ -2732,6 +2786,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp)
return Address(base, offset);
}
+Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
+ assert(offset >= 0, "spill to negative address?");
+
+ Register base = sp;
+
+ // An immediate offset in the range 0 to 255 which is multiplied
+ // by the current vector or predicate register size in bytes.
+ if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
+ return Address(base, offset / sve_reg_size_in_bytes);
+ }
+
+ add(tmp, base, offset);
+ return Address(tmp);
+}
+
// Checks whether offset is aligned.
// Returns true if it is, else false.
bool MacroAssembler::merge_alignment_check(Register base,
@@ -5930,3 +5999,13 @@ void MacroAssembler::verify_sve_vector_length() {
stop("Error: SVE vector length has changed since jvm startup");
bind(verify_ok);
}
+
+void MacroAssembler::verify_ptrue() {
+ Label verify_ok;
+ assert(UseSVE > 0, "should only be used for SVE");
+ sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
+ sve_dec(rscratch1, B);
+ cbz(rscratch1, verify_ok);
+ stop("Error: the preserved predicate register (p7) elements are not all true");
+ bind(verify_ok);
+}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index ec9b3cc..07e3169 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -862,8 +862,10 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);)
- void push_CPU_state(bool save_vectors = false);
- void pop_CPU_state(bool restore_vectors = false) ;
+ void push_CPU_state(bool save_vectors = false, bool use_sve = false,
+ int sve_vector_size_in_bytes = 0);
+ void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
+ int sve_vector_size_in_bytes = 0);
// Round up to a power of two
void round_to(Register reg, int modulus);
@@ -939,6 +941,10 @@ public:
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
void verify_sve_vector_length();
+ void reinitialize_ptrue() {
+ sve_ptrue(ptrue, B);
+ }
+ void verify_ptrue();
// Debugging
@@ -1338,6 +1344,7 @@ private:
// Returns an address on the stack which is reachable with a ldr/str of size
// Uses rscratch2 if the address is not directly reachable
Address spill_address(int size, int offset, Register tmp=rscratch2);
+ Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
@@ -1361,6 +1368,9 @@ public:
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
str(Vx, T, spill_address(1 << (int)T, offset));
}
+ void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+ sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+ }
void unspill(Register Rx, bool is64, int offset) {
if (is64) {
ldr(Rx, spill_address(8, offset));
@@ -1371,6 +1381,9 @@ public:
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
ldr(Vx, T, spill_address(1 << (int)T, offset));
}
+ void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
+ sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
+ }
void spill_copy128(int src_offset, int dst_offset,
Register tmp1=rscratch1, Register tmp2=rscratch2) {
if (src_offset < 512 && (src_offset & 7) == 0 &&
@@ -1384,6 +1397,15 @@ public:
spill(tmp1, true, dst_offset+8);
}
}
+ void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
+ int sve_vec_reg_size_in_bytes) {
+ assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
+ for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
+ spill_copy128(src_offset, dst_offset);
+ src_offset += 16;
+ dst_offset += 16;
+ }
+ }
};
#ifdef ASSERT
diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
index 1602a78..e476456 100644
--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp
@@ -196,3 +196,5 @@ REGISTER_DEFINITION(PRegister, p4);
REGISTER_DEFINITION(PRegister, p5);
REGISTER_DEFINITION(PRegister, p6);
REGISTER_DEFINITION(PRegister, p7);
+
+REGISTER_DEFINITION(PRegister, ptrue);
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index 4b35aa6..491e29d 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -152,7 +152,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save Integer and Float registers.
__ enter();
- __ push_CPU_state(save_vectors);
+ __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
@@ -191,10 +191,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
-#ifndef COMPILER2
+#ifdef COMPILER2
+ __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
+ Matcher::scalable_vector_reg_size(T_BYTE));
+#else
+#if !INCLUDE_JVMCI
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
#endif
__ pop_CPU_state(restore_vectors);
+#endif
__ leave();
}
@@ -2810,6 +2815,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ maybe_isb();
__ membar(Assembler::LoadLoad | Assembler::LoadStore);
+ if (UseSVE > 0 && save_vectors) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
+
__ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
__ cbz(rscratch1, noException);
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index d307871..cd3f6f4 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator {
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
rthread, c_rarg1);
+ if (UseSVE > 0 ) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
// we should not really care that lr is no longer the callee
// address. we saved the value the handler needs in r19 so we can
// just copy it to r3. however, the C2 handler will push its own
@@ -5421,6 +5426,12 @@ class StubGenerator: public StubCodeGenerator {
__ reset_last_Java_frame(true);
__ maybe_isb();
+ if (UseSVE > 0) {
+ // Reinitialize the ptrue predicate register, in case the external runtime
+ // call clobbers ptrue reg, as we may return to SVE compiled code.
+ __ reinitialize_ptrue();
+ }
+
__ leave();
// check for pending exceptions
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index f142afa..98e6780 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index b3bf64c..cc1e1a1 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index ea09aaa..782c1c7 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) {
// BUT: make sure match rule is not disabled by a false predicate!
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// Identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
index a09c795..3b1b104 100644
--- a/src/hotspot/cpu/sparc/sparc.ad
+++ b/src/hotspot/cpu/sparc/sparc.ad
@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// TODO
// identify extra cases that we might want to provide match rules for
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 4e1336f..b75d0ff 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1379,7 +1379,7 @@ const bool Matcher::match_rule_supported(int opcode) {
return ret_value; // Per default match rules are supported.
}
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
bool ret_value = match_rule_supported(opcode);
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
index ed890f8..9a83071 100644
--- a/src/hotspot/share/opto/matcher.hpp
+++ b/src/hotspot/share/opto/matcher.hpp
@@ -310,7 +310,7 @@ public:
// identify extra cases that we might want to provide match rules for
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- static const bool match_rule_supported_vector(int opcode, int vlen);
+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
// Some microarchitectures have mask registers used on vectors
static const bool has_predicated_vectors(void);
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index fed52e4..ee58323 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be");
- // Do vectors exist on this architecture?
- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
+ // SuperWord only works with power of two vector sizes.
+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
+ if (vector_width < 2 || !is_power_of_2(vector_width)) {
+ return;
+ }
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index 1f2cf2c..6867177 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = VectorNode::opcode(opc, bt);
- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}
return false;
}
@@ -653,7 +653,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = ReductionNode::opcode(opc, bt);
- return vopc != opc && Matcher::match_rule_supported(vopc);
+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}
return false;
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。