1 Star 0 Fork 128

cenhuilin/gcc

forked from src-openEuler/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
LoongArch-Use-bstrins-instruction-for-a-mask-and-a-m.patch 10.59 KB
一键复制 编辑 原始数据 按行查看 历史
ticat_fp 提交于 2024-03-26 09:26 . LoongArch: update from gcc upstream
From 1c63c61f6508e3c718be79dd27dda25db2b291ee Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 5 Sep 2023 19:42:30 +0800
Subject: [PATCH 068/124] LoongArch: Use bstrins instruction for (a & ~mask)
and (a & mask) | (b & ~mask) [PR111252]
If mask is a constant with value ((1 << N) - 1) << M we can perform this
optimization.
gcc/ChangeLog:
PR target/111252
* config/loongarch/loongarch-protos.h
(loongarch_pre_reload_split): Declare new function.
(loongarch_use_bstrins_for_ior_with_mask): Likewise.
* config/loongarch/loongarch.cc
(loongarch_pre_reload_split): Implement.
(loongarch_use_bstrins_for_ior_with_mask): Likewise.
* config/loongarch/predicates.md (ins_zero_bitmask_operand):
New predicate.
* config/loongarch/loongarch.md (bstrins_<mode>_for_mask):
New define_insn_and_split.
(bstrins_<mode>_for_ior_mask): Likewise.
(define_peephole2): Further optimize code sequence produced by
bstrins_<mode>_for_ior_mask if possible.
gcc/testsuite/ChangeLog:
* g++.target/loongarch/bstrins-compile.C: New test.
* g++.target/loongarch/bstrins-run.C: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-protos.h | 4 +-
gcc/config/loongarch/loongarch.cc | 36 ++++++++
gcc/config/loongarch/loongarch.md | 91 +++++++++++++++++++
gcc/config/loongarch/predicates.md | 8 ++
.../g++.target/loongarch/bstrins-compile.C | 22 +++++
.../g++.target/loongarch/bstrins-run.C | 65 +++++++++++++
6 files changed, 225 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-compile.C
create mode 100644 gcc/testsuite/g++.target/loongarch/bstrins-run.C
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 133ec9fa8..ea61cf567 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -56,7 +56,7 @@ enum loongarch_symbol_type {
};
#define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1)
-/* Routines implemented in loongarch.c. */
+/* Routines implemented in loongarch.cc. */
extern rtx loongarch_emit_move (rtx, rtx);
extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
extern void loongarch_expand_prologue (void);
@@ -163,6 +163,8 @@ extern const char *current_section_name (void);
extern unsigned int current_section_flags (void);
extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern bool loongarch_check_zero_div_p (void);
+extern bool loongarch_pre_reload_split (void);
+extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
union loongarch_gen_fn_ptrs
{
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index dae35a479..4b0944d56 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5478,6 +5478,42 @@ loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
return true;
}
+/* Predicate for pre-reload splitters with associated instructions,
+ which can match any time before the split1 pass (usually combine),
+ then are unconditionally split in that pass and should not be
+ matched again afterwards. */
+
+bool loongarch_pre_reload_split (void)
+{
+ return (can_create_pseudo_p ()
+ && !(cfun->curr_properties & PROP_rtl_split_insns));
+}
+
+/* Check if we can use bstrins.<d> for
+ op0 = (op1 & op2) | (op3 & op4)
+ where op0, op1, op3 are regs, and op2, op4 are integer constants. */
+int
+loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
+{
+ unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
+ unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
+
+ if (mask1 != ~mask2 || !mask1 || !mask2)
+ return 0;
+
+ /* Try to avoid a right-shift. */
+ if (low_bitmask_len (mode, mask1) != -1)
+ return -1;
+
+ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+ return 1;
+
+ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+ return -1;
+
+ return 0;
+}
+
/* Print the text for PRINT_OPERAND punctation character CH to FILE.
The punctuation characters are:
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 3dde0ceb1..11c18bf15 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1322,6 +1322,97 @@
[(set_attr "move_type" "pick_ins")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*bstrins_<mode>_for_mask"
+ [(set (match_operand:GPR 0 "register_operand")
+ (and:GPR (match_operand:GPR 1 "register_operand")
+ (match_operand:GPR 2 "ins_zero_bitmask_operand")))]
+ ""
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 1))
+ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3))
+ (const_int 0))]
+ {
+ unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+ int lo = ffs_hwi (mask) - 1;
+ int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+ len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+ operands[2] = GEN_INT (len);
+ operands[3] = GEN_INT (lo);
+ })
+
+(define_insn_and_split "*bstrins_<mode>_for_ior_mask"
+ [(set (match_operand:GPR 0 "register_operand")
+ (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand")
+ (match_operand:GPR 2 "const_int_operand"))
+ (and:GPR (match_operand:GPR 3 "register_operand")
+ (match_operand:GPR 4 "const_int_operand"))))]
+ "loongarch_pre_reload_split () && \
+ loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 1))
+ (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
+ (match_dup 3))]
+ {
+ if (loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands) < 0)
+ {
+ std::swap (operands[1], operands[3]);
+ std::swap (operands[2], operands[4]);
+ }
+
+ unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
+ int lo = ffs_hwi (mask) - 1;
+ int len = low_bitmask_len (<MODE>mode, mask >> lo);
+
+ len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
+ operands[2] = GEN_INT (len);
+ operands[4] = GEN_INT (lo);
+
+ if (lo)
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, gen_rtx_ASHIFTRT(<MODE>mode, operands[3],
+ GEN_INT (lo)));
+ operands[3] = tmp;
+ }
+ })
+
+;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask
+;; if possible, but the result may be sub-optimal when one of the masks
+;; is (1 << N) - 1 and one of the src register is the dest register.
+;; For example:
+;; move t0, a0
+;; move a0, a1
+;; bstrins.d a0, t0, 42, 0
+;; ret
+;; using a shift operation would be better:
+;; srai.d t0, a1, 43
+;; bstrins.d a0, t0, 63, 43
+;; ret
+;; unfortunately we cannot figure it out in split1: before reload we cannot
+;; know if the dest register is one of the src register. Fix it up in
+;; peephole2.
+(define_peephole2
+ [(set (match_operand:GPR 0 "register_operand")
+ (match_operand:GPR 1 "register_operand"))
+ (set (match_dup 1) (match_operand:GPR 2 "register_operand"))
+ (set (zero_extract:GPR (match_dup 1)
+ (match_operand:SI 3 "const_int_operand")
+ (const_int 0))
+ (match_dup 0))]
+ "peep2_reg_dead_p (3, operands[0])"
+ [(const_int 0)]
+ {
+ int len = GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[3]);
+
+ emit_insn (gen_ashr<mode>3 (operands[0], operands[2], operands[3]));
+ emit_insn (gen_insv<mode> (operands[1], GEN_INT (len), operands[3],
+ operands[0]));
+ DONE;
+ })
+
(define_insn "*iorhi3"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(ior:HI (match_operand:HI 1 "register_operand" "%r,r")
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index cf9361b73..ad6cee5c4 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -408,6 +408,14 @@
(define_predicate "muldiv_target_operand"
(match_operand 0 "register_operand"))
+(define_predicate "ins_zero_bitmask_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) != -1")
+ (match_test "INTVAL (op) & 1")
+ (match_test "low_bitmask_len (mode, \
+ ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
+ > 12")))
+
(define_predicate "const_call_insn_operand"
(match_code "const,symbol_ref,label_ref")
{
diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-compile.C b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C
new file mode 100644
index 000000000..3c0db1de4
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bstrins-compile.C
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++14 -O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d.*7,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*15,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*31,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*47,4" } } */
+/* { dg-final { scan-assembler "bstrins\\.d.*3,0" } } */
+
+typedef unsigned long u64;
+
+template <u64 mask>
+u64
+test (u64 a, u64 b)
+{
+ return (a & mask) | (b & ~mask);
+}
+
+template u64 test<0x0000'0000'0000'00f0l> (u64, u64);
+template u64 test<0x0000'0000'0000'fff0l> (u64, u64);
+template u64 test<0x0000'0000'ffff'fff0l> (u64, u64);
+template u64 test<0x0000'ffff'ffff'fff0l> (u64, u64);
+template u64 test<0xffff'ffff'ffff'fff0l> (u64, u64);
diff --git a/gcc/testsuite/g++.target/loongarch/bstrins-run.C b/gcc/testsuite/g++.target/loongarch/bstrins-run.C
new file mode 100644
index 000000000..68913d5e0
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bstrins-run.C
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+typedef unsigned long gr;
+
+template <int l, int r>
+struct mask {
+ enum { value = (1ul << r) - (1ul << l) };
+};
+
+template <int l>
+struct mask<l, sizeof (gr) * __CHAR_BIT__> {
+ enum { value = -(1ul << l) };
+};
+
+__attribute__ ((noipa)) void
+test (gr a, gr b, gr mask, gr out)
+{
+ if (((a & mask) | (b & ~mask)) != out)
+ __builtin_abort ();
+}
+
+__attribute__ ((noipa)) gr
+no_optimize (gr x)
+{
+ return x;
+}
+
+template <int l, int r>
+struct test1 {
+ static void
+ run (void)
+ {
+ gr m = mask<l, r>::value;
+ gr a = no_optimize (-1ul);
+ gr b = no_optimize (0);
+
+ test (a, b, m, (a & m) | (b & ~m));
+ test (a, b, ~m, (a & ~m) | (b & m));
+ test (a, 0, ~m, a & ~m);
+
+ test1<l, r + 1>::run ();
+ }
+};
+
+template <int l>
+struct test1<l, sizeof (gr) * __CHAR_BIT__ + 1> {
+ static void run (void) {}
+};
+
+template <int l>
+void
+test2 (void)
+{
+ test1<l, l + 1>::run ();
+ test2<l + 1> ();
+}
+
+template <> void test2<sizeof (gr) * __CHAR_BIT__> (void) {}
+
+int
+main ()
+{
+ test2<0> ();
+}
--
2.33.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/cenhuilin/gcc.git
git@gitee.com:cenhuilin/gcc.git
cenhuilin
gcc
gcc
master

搜索帮助