代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/gcc 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 21 Oct 2020 10:51:33 +0200
Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
in GIMPLE [PR97503]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163
While we have at the RTL level noce_try_ifelse_collapse combined with
simplify_cond_clz_ctz, that optimization doesn't always trigger because
e.g. on powerpc there is an define_insn to compare a reg against zero and
copy that register to another one and so we end up with a different pseudo
in the simplify_cond_clz_ctz test and punt.
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
we can optimize it already in phiopt though, just need to ensure that
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
VRP changes codified that the builtin calls are always undefined at zero,
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
And, in phiopt we already have popcount handling that does pretty much the
same thing, except for always using a zero value rather than the one set
by C?Z_DEFINED_VALUE_AT_ZERO.
So, this patch extends that function to handle not just popcount, but also
clz and ctz.
2020-10-21 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97503
* tree-ssa-phiopt.c: Include internal-fn.h.
(cond_removal_in_popcount_pattern): Rename to ...
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
* gcc.dg/tree-ssa/pr97503.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++
gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------
2 files changed, 95 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
new file mode 100644
index 000000000..3a3dae6c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
@@ -0,0 +1,19 @@
+/* PR tree-optimization/97503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+
+int
+foo (int x)
+{
+ return x ? __builtin_clz (x) : 32;
+}
+
+int
+bar (unsigned long long x)
+{
+ return x ? __builtin_clzll (x) : 64;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 0623d740d..c1e11916e 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "case-cfn-macros.h"
#include "tree-eh.h"
+#include "internal-fn.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+ edge, edge, gimple *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
- phi, arg0, arg1))
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+ e2, phi, arg0,
+ arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
<bb 4>
c_12 = PHI <_9(2)>
-*/
+
+ Similarly for __builtin_clz or __builtin_ctz if
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
+ instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
- edge e1, edge e2,
- gimple *phi, tree arg0, tree arg1)
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gimple *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
- gimple *popcount;
+ gimple *call;
gimple *cast = NULL;
tree lhs, arg;
@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
{
- popcount = gsi_stmt (gsi);
+ call = gsi_stmt (gsi);
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
return false;
}
else
{
- popcount = cast;
+ call = cast;
cast = NULL;
}
- /* Check that we have a popcount builtin. */
- if (!is_gimple_call (popcount))
+ /* Check that we have a popcount/clz/ctz builtin. */
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
+ return false;
+
+ arg = gimple_call_arg (call, 0);
+ lhs = gimple_get_lhs (call);
+
+ if (lhs == NULL_TREE)
return false;
- combined_fn cfn = gimple_call_combined_fn (popcount);
+
+ combined_fn cfn = gimple_call_combined_fn (call);
+ internal_fn ifn = IFN_LAST;
+ int val = 0;
switch (cfn)
{
CASE_CFN_POPCOUNT:
break;
+ CASE_CFN_CLZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CLZ;
+ break;
+ }
+ }
+ return false;
+ CASE_CFN_CTZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CTZ;
+ break;
+ }
+ }
+ return false;
default:
return false;
}
- arg = gimple_call_arg (popcount, 0);
- lhs = gimple_get_lhs (popcount);
-
if (cast)
{
- /* We have a cast stmt feeding popcount builtin. */
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
/* Check that we have a cast prior to that. */
if (gimple_code (cast) != GIMPLE_ASSIGN
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
cond = last_stmt (cond_bb);
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
builtin. */
if (gimple_code (cond) != GIMPLE_COND
|| (gimple_cond_code (cond) != NE_EXPR
@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
}
/* Check PHI arguments. */
- if (lhs != arg0 || !integer_zerop (arg1))
+ if (lhs != arg0
+ || TREE_CODE (arg1) != INTEGER_CST
+ || wi::to_wide (arg1) != val)
return false;
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
+ cond_bb. */
gsi = gsi_last_bb (cond_bb);
if (cast)
{
@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_move_before (&gsi_from, &gsi);
reset_flow_sensitive_info (gimple_get_lhs (cast));
}
- gsi_from = gsi_for_stmt (popcount);
- gsi_move_before (&gsi_from, &gsi);
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
+ gsi_from = gsi_for_stmt (call);
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
+ gsi_move_before (&gsi_from, &gsi);
+ else
+ {
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
+ the latter is well defined at zero. */
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+ gimple_call_set_lhs (call, lhs);
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+ gsi_remove (&gsi_from, true);
+ }
+ reset_flow_sensitive_info (lhs);
/* Now update the PHI and remove unneeded bbs. */
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
--
2.27.0.windows.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。