1 Star 0 Fork 128

Mingtai/gcc

forked from src-openEuler/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0009-Backport-expand-Simplify-removing-subregs-when-expan.patch 4.98 KB
一键复制 编辑 原始数据 按行查看 历史
eastb233 提交于 2021-07-28 11:42 . [Sync] Sync patch from openeuler/gcc
From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
From: zhanghaijian <z.zhanghaijian@huawei.com>
Date: Thu, 15 Jul 2021 09:04:55 +0800
Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
expanding a copy [PR95254]
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
In rtl expand, if we have a copy that matches one of the following patterns:
(set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
(set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
(set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
(set (subreg:M1 (reg:M2 ...)) (constant C))
where mode M1 is equal in size to M2, try to detect whether the mode change
involves an implicit round trip through memory. If so, see if we can avoid
that by removing the subregs and doing the move in mode M2 instead.
diff --git a/gcc/expr.c b/gcc/expr.c
index 991b26f3341..d66fdd4e93d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
gcc_assert (mode != BLKmode
&& (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ /* If we have a copy that looks like one of the following patterns:
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
+ where mode M1 is equal in size to M2, try to detect whether the
+ mode change involves an implicit round trip through memory.
+ If so, see if we can avoid that by removing the subregs and
+ doing the move in mode M2 instead. */
+
+ rtx x_inner = NULL_RTX;
+ rtx y_inner = NULL_RTX;
+
+#define CANDIDATE_SUBREG_P(subreg) \
+ (REG_P (SUBREG_REG (subreg)) \
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
+ GET_MODE_SIZE (GET_MODE (subreg))) \
+ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
+ != CODE_FOR_nothing)
+
+#define CANDIDATE_MEM_P(innermode, mem) \
+ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
+ && !push_operand ((mem), GET_MODE (mem)) \
+ /* Not a candiate if innermode requires too much alignment. */ \
+ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \
+ || targetm.slow_unaligned_access (GET_MODE (mem), \
+ MEM_ALIGN (mem)) \
+ || !targetm.slow_unaligned_access ((innermode), \
+ MEM_ALIGN (mem))))
+
+ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
+ x_inner = SUBREG_REG (x);
+
+ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
+ y_inner = SUBREG_REG (y);
+
+ if (x_inner != NULL_RTX
+ && y_inner != NULL_RTX
+ && GET_MODE (x_inner) == GET_MODE (y_inner)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && MEM_P (y)
+ && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
+ {
+ x = x_inner;
+ y = adjust_address (y, GET_MODE (x_inner), 0);
+ mode = GET_MODE (x_inner);
+ }
+ else if (y_inner != NULL_RTX
+ && MEM_P (x)
+ && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
+ {
+ x = adjust_address (x, GET_MODE (y_inner), 0);
+ y = y_inner;
+ mode = GET_MODE (y_inner);
+ }
+ else if (x_inner != NULL_RTX
+ && CONSTANT_P (y)
+ && !targetm.can_change_mode_class (GET_MODE (x_inner),
+ mode, ALL_REGS)
+ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
+ {
+ x = x_inner;
+ y = y_inner;
+ mode = GET_MODE (x_inner);
+ }
+
if (CONSTANT_P (y))
{
if (optimize
diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
new file mode 100644
index 00000000000..10bfc868197
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
+
+typedef short __attribute__((vector_size (8))) v4hi;
+
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
+
+short b[4];
+
+void pass_v4hi (v4hi v)
+{
+ int i;
+ u4hi u;
+ u.v = v;
+ for (i = 0; i < 4; i++)
+ b[i] = u.a[i];
+};
+
+/* { dg-final { scan-assembler-not "ptrue" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
index 518071bdd86..398cdba5d5f 100644
--- a/gcc/testsuite/gcc.target/i386/pr67609.c
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
/* { dg-require-effective-target lp64 } */
-/* { dg-final { scan-assembler "movdqa" } } */
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
#include <emmintrin.h>
__m128d reg;
--
2.21.0.windows.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yangmingtaip/gcc.git
git@gitee.com:yangmingtaip/gcc.git
yangmingtaip
gcc
gcc
master

搜索帮助