1 Star 0 Fork 128

Mingtai/gcc

forked from src-openEuler/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0027-Autoprefetch-Support-auto-feedback-prefetch.patch 29.47 KB
一键复制 编辑 原始数据 按行查看 历史
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
From 6b944bed1158d3454b1db27aeab4ec1f2b8e5866 Mon Sep 17 00:00:00 2001
From: huangxiaoquan <huangxiaoquan1@huawei.com>
Date: Thu, 27 Jan 2022 18:24:53 +0800
Subject: [PATCH 27/28] [Autoprefetch] Support auto feedback prefetch
1.Add option -fprefetch-loop-arrays=[value].
2.A prefetch distance analysis algorithm based on branch weight
is proposed to improve the accuracy of prefetch distance.
3.Propose automatic feedback prefetching:
use the cache-miss profile information to guide the insertion of
prefetching instructions.
---
gcc/auto-profile.c | 5 +-
gcc/common.opt | 5 +
gcc/opts.c | 7 +
gcc/params.opt | 16 +
gcc/tree-ssa-loop-prefetch.c | 735 ++++++++++++++++++++++++++++++++++-
5 files changed, 748 insertions(+), 20 deletions(-)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index e6164b91b..f221978fc 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
#include "config.h"
#define INCLUDE_MAP
#define INCLUDE_SET
+#define INCLUDE_ALGORITHM
+#define INCLUDE_VECTOR
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -49,9 +51,6 @@ along with GCC; see the file COPYING3. If not see
#include "auto-profile.h"
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
-#include <map>
-#include <vector>
-#include <algorithm>
/* The following routines implements AutoFDO optimization.
diff --git a/gcc/common.opt b/gcc/common.opt
index 37cbbd8c0..9488bd90f 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2201,6 +2201,11 @@ fprefetch-loop-arrays
Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization
Generate prefetch instructions, if available, for arrays in loops.
+fprefetch-loop-arrays=
+Common Joined RejectNegative UInteger Var(prefetch_level) Init(0) IntegerRange(0, 3)
+Generate prefetch instructions, if available, for arrays in loops. The prefetch
+level can control the optimize level to array prefetch.
+
fprofile
Common Report Var(profile_flag)
Enable basic program profiling code.
diff --git a/gcc/opts.c b/gcc/opts.c
index 7a39f618b..f49f5ee58 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1747,6 +1747,8 @@ set_cache_misses_profile_params (struct gcc_options *opts,
struct gcc_options *opts_set)
{
SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
+ SET_OPTION_IF_UNSET (opts, opts_set, prefetch_level, 2);
+ SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 100);
}
/* -f{,no-}sanitize{,-recover}= suboptions. */
@@ -2645,6 +2647,11 @@ common_handle_option (struct gcc_options *opts,
SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_bit_cp, value);
break;
+ case OPT_fprefetch_loop_arrays_:
+ opts->x_prefetch_level = value;
+ opts->x_flag_prefetch_loop_arrays = true;
+ break;
+
case OPT_fpatchable_function_entry_:
{
char *patch_area_arg = xstrdup (arg);
diff --git a/gcc/params.opt b/gcc/params.opt
index 2db69cc87..9d1faa7ab 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -968,4 +968,20 @@ Bound on number of runtime checks inserted by the vectorizer's loop versioning f
Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
+-param=param-prefetch-func-topn=
+Common Joined UInteger Var(param_prefetch_func_topn) Init(3) Param Optimization
+TopN functions of cache miss counts to be analyzed in prefetching.
+
+-param=param-prefetch-ref-topn=
+Common Joined UInteger Var(param_prefetch_ref_topn) Init(5) Param Optimization
+TopN ref of cache miss counts to be analyzed in prefetching.
+
+-param=param-high-loop-execution-rate=
+Common Joined UInteger Var(param_high_loop_execution_rate) Init(95) IntegerRange(0, 100) Param Optimization
+High execution rate loops to be analyzed in prefetch (in%).
+
+-param=param-prefetch-func-counts-threshold=
+Common Joined UInteger Var(param_prefetch_func_counts_threshold) Init(100) Param Optimization
+Threshold functions of cache miss counts to be analyzed in prefetching.
+
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index d19ece641..3a5aef0fc 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -18,6 +18,9 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
+#define INCLUDE_ALGORITHM
+#define INCLUDE_MAP
+#define INCLUDE_VECTOR
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -48,6 +51,11 @@ along with GCC; see the file COPYING3. If not see
#include "tree-data-ref.h"
#include "diagnostic-core.h"
#include "dbgcnt.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "auto-profile.h"
+#include "cgraph.h"
+#include "print-tree.h"
/* This pass inserts prefetch instructions to optimize cache usage during
accesses to arrays in loops. It processes loops sequentially and:
@@ -253,6 +261,22 @@ struct mem_ref_group
#define PREFETCH_MAX_MEM_REFS_PER_LOOP 200
#endif
+#ifndef PREFETCH_FUNC_TOPN
+#define PREFETCH_FUNC_TOPN param_prefetch_func_topn
+#endif
+
+#ifndef PREFETCH_FUNC_COUNTS_THRESHOLD
+#define PREFETCH_FUNC_COUNTS_THRESHOLD param_prefetch_func_counts_threshold
+#endif
+
+#ifndef PREFETCH_REF_TOPN
+#define PREFETCH_REF_TOPN param_prefetch_ref_topn
+#endif
+
+#ifndef LOOP_EXECUTION_RATE
+#define LOOP_EXECUTION_RATE param_high_loop_execution_rate
+#endif
+
/* The memory reference. */
struct mem_ref
@@ -279,6 +303,131 @@ struct mem_ref
nontemporal one. */
};
+/* Probability information of basic blocks and branches. */
+struct bb_bp
+{
+ basic_block bb;
+ basic_block true_edge_bb;
+ basic_block false_edge_bb;
+ float true_edge_prob;
+ float false_edge_prob;
+ float bb_prob;
+};
+
+typedef struct bb_bp bb_bp;
+
+enum PREFETCH_MODE
+{
+ ORIGINAL_MODE=0, /* Original prefetch method. */
+ REFINE_BB_AHEAD,
+ /* Prefetch distance algorithm for removing
+ irrelevant bb. */
+ BRANCH_WEIGHTED_AHEAD,
+ /* Branch weighted prefetch
+ distance algorithm. */
+ INDIRECT_MODE /* Indirect array prefetch mode. */
+};
+
+typedef std::map <unsigned int, unsigned int> uid_rank_map;
+typedef std::map <location_t, unsigned int> loc_rank_map;
+typedef std::vector <std::pair<location_t, gcov_type> > loc_gcov_type_vec;
+typedef std::map <location_t, std::vector<gimple *> > loc_gimple_vec_map;
+
+static loc_rank_map ref_rank;
+
+/* Callback function for event_count comparison. */
+
+static bool
+event_count_cmp (std::pair<unsigned int, gcov_type> &a,
+ std::pair<unsigned int, gcov_type> &b)
+{
+ return a.second > b.second;
+}
+
+/* Prepared mappings from location to counts and from location
+ to stmt list. */
+
+static void
+prepare_loc_count_info (function *fun, loc_gcov_type_vec &ref_sorted,
+ loc_gimple_vec_map &loc_stmt, event_type event)
+{
+ basic_block bb = NULL;
+ gimple_stmt_iterator bsi;
+ gimple *stmt;
+ tree lhs = NULL_TREE;
+ tree rhs = NULL_TREE;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ stmt = gsi_stmt (bsi);
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ {
+ continue;
+ }
+ if (!gimple_vuse (stmt))
+ {
+ continue;
+ }
+ lhs = gimple_assign_lhs (stmt);
+ rhs = gimple_assign_rhs1 (stmt);
+ if (REFERENCE_CLASS_P (rhs) || REFERENCE_CLASS_P (lhs))
+ {
+ gcov_type loc_count =
+ event_get_loc_count (gimple_location (stmt), event);
+ if (loc_count > 0)
+ {
+ /* There may be multiple gimple correspond to the same
+ location. */
+ if (loc_stmt.count (gimple_location (stmt)) == 0)
+ {
+ ref_sorted.push_back (std::make_pair (gimple_location (stmt),
+ loc_count));
+ }
+ loc_stmt[gimple_location (stmt)].push_back (stmt);
+ }
+ }
+ }
+ }
+}
+
+/* Sort references by event_count and dump loc count information after
+ sorting. */
+
+static void
+sort_ref_by_event_count (function *fun, event_type event)
+{
+ loc_gcov_type_vec ref_sorted;
+ loc_gimple_vec_map loc_stmt;
+
+ prepare_loc_count_info (fun, ref_sorted, loc_stmt, event);
+ sort (ref_sorted.begin (), ref_sorted.end (), event_count_cmp);
+
+ for (unsigned i = 0; i < ref_sorted.size (); ++i)
+ {
+ ref_rank[ref_sorted[i].first] = i + 1;
+ /* Print the stmt and count of the topn ref. */
+ if (i < PREFETCH_REF_TOPN && dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt: \n");
+ for (unsigned j = 0; j < loc_stmt[ref_sorted[i].first].size ();
+ ++j)
+ {
+ print_gimple_stmt (dump_file,
+ loc_stmt[ref_sorted[i].first][j], 0);
+ }
+ gcov_type loc_count =
+ event_get_loc_count (ref_sorted[i].first, event);
+ fprintf (dump_file, "stmt loc %u counts is %lu: "
+ "rank %d in top %d, (candidate analysis)\n\n",
+ ref_sorted[i].first, loc_count,
+ ref_rank[ref_sorted[i].first], PREFETCH_REF_TOPN);
+ }
+ }
+ return;
+}
+
/* Dumps information about memory reference */
static void
dump_mem_details (FILE *file, tree base, tree step,
@@ -479,6 +628,30 @@ idx_analyze_ref (tree base, tree *index, void *data)
return true;
}
+/* Dumps information about ar_data structure. */
+
+static void
+dump_ar_data_details (FILE *file, tree ref, struct ar_data &ar_data)
+{
+ print_generic_expr (file, ref, TDF_SLIM);
+ fprintf (file, "\n");
+ if (*(ar_data.step))
+ {
+ fprintf (file, " step ");
+ if (cst_and_fits_in_hwi (*(ar_data.step)))
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+ int_cst_value (*(ar_data.step)));
+ else
+ print_generic_expr (file, *(ar_data.step), TDF_SLIM);
+ }
+ fprintf (file, "\n");
+ if (*(ar_data.delta))
+ {
+ fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n",
+ *(ar_data.delta));
+ }
+}
+
/* Tries to express REF_P in shape &BASE + STEP * iter + DELTA, where DELTA and
STEP are integer constants and iter is number of iterations of LOOP. The
reference occurs in statement STMT. Strips nonaddressable component
@@ -526,7 +699,17 @@ analyze_ref (class loop *loop, tree *ref_p, tree *base,
ar_data.stmt = stmt;
ar_data.step = step;
ar_data.delta = delta;
- return for_each_index (base, idx_analyze_ref, &ar_data);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ dump_ar_data_details (dump_file, ref, ar_data);
+ }
+ bool idx_flag = for_each_index (base, idx_analyze_ref, &ar_data);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "idx_flag = %d \n\n", idx_flag);
+ }
+ return idx_flag;
}
/* Record a memory reference REF to the list REFS. The reference occurs in
@@ -601,6 +784,55 @@ gather_memory_references_ref (class loop *loop, struct mem_ref_group **refs,
return true;
}
+/* Determine whether to collect the memory references based on the
+ ranking of ref cache miss counts. */
+
+static bool
+should_gather_memory_references (gimple *stmt)
+{
+ if (!(profile_exist (CACHE_MISSES)))
+ {
+ return true;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt:");
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+ if (ref_rank.count (gimple_location (stmt)) == 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt location no found, skip prefetch "
+ "analysis\n");
+ }
+ return false;
+ }
+ gcov_type loc_count = event_get_loc_count (gimple_location (stmt), CACHE_MISSES);
+ if (ref_rank[gimple_location (stmt)] > PREFETCH_REF_TOPN)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt loc %u counts is %lu:"
+ "rank %d exceed topn %d, skip prefetch "
+ "analysis\n",
+ gimple_location (stmt), loc_count,
+ ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN);
+ }
+ return false;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt loc %u counts is %lu: rank %d in top %d,"
+ "continue prefetch analysis\n",
+ gimple_location (stmt), loc_count,
+ ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN);
+ }
+ return true;
+}
+
/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
true if there are no other memory references inside the loop. */
@@ -626,6 +858,13 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c
if (bb->loop_father != loop)
continue;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "==== the %dth loop bb body ====\n", i);
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
{
stmt = gsi_stmt (bsi);
@@ -642,20 +881,31 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c
if (! gimple_vuse (stmt))
continue;
+ if (!should_gather_memory_references (stmt))
+ continue;
+
lhs = gimple_assign_lhs (stmt);
rhs = gimple_assign_rhs1 (stmt);
if (REFERENCE_CLASS_P (rhs))
{
- *no_other_refs &= gather_memory_references_ref (loop, &refs,
- rhs, false, stmt);
- *ref_count += 1;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "====> the %dth ref \n", *ref_count);
+ }
+ *no_other_refs &= gather_memory_references_ref (loop, &refs, rhs,
+ false, stmt);
+ *ref_count += 1;
}
if (REFERENCE_CLASS_P (lhs))
{
- *no_other_refs &= gather_memory_references_ref (loop, &refs,
- lhs, true, stmt);
- *ref_count += 1;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "====> the %dth ref \n", *ref_count);
+ }
+ *no_other_refs &= gather_memory_references_ref (loop, &refs, lhs,
+ true, stmt);
+ *ref_count += 1;
}
}
}
@@ -1168,9 +1418,9 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES;
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
- nontemporal ? " nontemporal" : "",
- ref->group->uid, ref->uid);
+ fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
+ nontemporal ? " nontemporal" : "",
+ ref->group->uid, ref->uid);
bsi = gsi_for_stmt (ref->stmt);
@@ -1875,6 +2125,306 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
return false;
}
+/* Obtain the edge probability information of each basic block in the loop. */
+
+static float
+get_edge_prob (edge e)
+{
+ /* Limit the minimum probability value. */
+ const float MINNUM_PROB = 0.00001f;
+ float fvalue = 1;
+
+ profile_probability probability = e->probability;
+ if (probability.initialized_p ())
+ {
+ fvalue = probability.to_reg_br_prob_base () / float (REG_BR_PROB_BASE);
+ if (fvalue < MINNUM_PROB && probability.to_reg_br_prob_base ())
+ {
+ fvalue = MINNUM_PROB;
+ }
+ }
+ return fvalue;
+}
+
+
+/* Dump the bb information in a loop. */
+
+static void
+dump_loop_bb (struct loop *loop)
+{
+ basic_block *body = get_loop_body_in_dom_order (loop);
+ basic_block bb = NULL;
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+ if (bb->loop_father != loop)
+ {
+ continue;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "===== the %dth loop bb body ======= \n", i);
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+ }
+ free (body);
+}
+
+
+/* Obtain the branch probability information of each basic block
+ in the loop. */
+
+static void
+get_bb_branch_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
+ struct loop *loop)
+{
+ basic_block *body = get_loop_body (loop);
+ basic_block bb = NULL;
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+ if (bb->loop_father != loop)
+ {
+ continue;
+ }
+ bb_bp &branch_prob = bb_branch_prob.get_or_insert (bb);
+ branch_prob.bb = bb;
+ branch_prob.true_edge_bb = NULL;
+ branch_prob.false_edge_bb = NULL;
+ branch_prob.true_edge_prob = 0;
+ branch_prob.false_edge_prob = 0;
+ branch_prob.bb_prob = 0;
+
+ gimple *stmt = last_stmt (bb);
+ if (stmt && gimple_code (stmt) == GIMPLE_COND)
+ {
+ if (EDGE_COUNT (bb->succs) != 2)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "The number of successful edges of bb"
+ "is abnormal\n");
+ continue;
+ }
+ edge true_edge = NULL;
+ edge false_edge = NULL;
+ extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+
+ /* If it is exiting bb, and the destination bb of the edge does not
+ belong to the current loop, the information of the edge is not
+ recorded. */
+ if (true_edge->dest->loop_father == loop)
+ {
+ branch_prob.true_edge_bb = true_edge->dest;
+ branch_prob.true_edge_prob = get_edge_prob (true_edge);
+ }
+ if (false_edge->dest->loop_father == loop)
+ {
+ branch_prob.false_edge_bb = false_edge->dest;
+ branch_prob.false_edge_prob = get_edge_prob (false_edge);
+ }
+ }
+
+ edge e = find_fallthru_edge (bb->succs);
+ if (e)
+ {
+ branch_prob.true_edge_bb = e->dest;
+ branch_prob.true_edge_prob = get_edge_prob (e);
+ }
+ }
+}
+
+/* Traverse each bb in the loop and prune fake loops. */
+
+static bool
+traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob,
+ int& max_path, hash_set <basic_block> &path_node,
+ basic_block current_bb, basic_block latch_bb)
+{
+ /* Limit the maximum number of analysis paths. */
+ if (max_path <= 0 || current_bb == NULL)
+ return false;
+
+ /* Do not join edges that do not form a complete loop. */
+ bb_bp *bb_bp_node = bb_branch_prob.get (current_bb);
+ if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL
+ && bb_bp_node->false_edge_bb == NULL))
+ return false;
+
+ if (current_bb == latch_bb)
+ {
+ max_path--;
+ return true;
+ }
+
+ /* Do not join edges that return to non-dominate nodes. */
+ if (path_node.contains (bb_bp_node->true_edge_bb)
+ || path_node.contains (bb_bp_node->false_edge_bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "fake loop: in bb%d\n", current_bb->index);
+ return false;
+ }
+
+ path_node.add (current_bb);
+ if (bb_bp_node->true_edge_bb)
+ {
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path,
+ path_node, bb_bp_node->true_edge_bb, latch_bb) == false)
+ return false;
+ }
+ if (bb_bp_node->false_edge_bb)
+ {
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path,
+ path_node, bb_bp_node->false_edge_bb, latch_bb) == false)
+ return false;
+ }
+ path_node.remove (current_bb);
+
+ max_path--;
+ return true;
+}
+
+/* Traverse and calculate the probability of basic block. */
+
+static void
+traverse_calculate_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
+ basic_block current_bb, basic_block latch_bb,
+ float prob)
+{
+ /* Limit bb block access probability, the probability is
+ less than 100% and include delta. */
+ const float MAX_BB_PROBABILITY = 1.001f;
+
+ if (current_bb == NULL)
+ {
+ return;
+ }
+ bb_bp *bb_bp_node = bb_branch_prob.get (current_bb);
+ bb_bp_node->bb_prob += prob;
+
+ gcc_assert (bb_bp_node->bb_prob <= MAX_BB_PROBABILITY);
+
+ if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL
+ && bb_bp_node->false_edge_bb == NULL))
+ {
+ return;
+ }
+ if (current_bb == latch_bb)
+ {
+ return;
+ }
+
+ bool assign = (bb_bp_node->true_edge_bb && bb_bp_node->false_edge_bb);
+ if (bb_bp_node->true_edge_bb)
+ {
+ float assign_prob = assign ? bb_bp_node->true_edge_prob * prob : prob;
+ traverse_calculate_bb_prob (bb_branch_prob,
+ bb_bp_node->true_edge_bb, latch_bb, assign_prob);
+ }
+ if (bb_bp_node->false_edge_bb)
+ {
+ float assign_prob = assign ? bb_bp_node->false_edge_prob * prob : prob;
+ traverse_calculate_bb_prob (bb_branch_prob,
+ bb_bp_node->false_edge_bb, latch_bb, assign_prob);
+ }
+ return;
+}
+
+/* Obtain the probability of basic block. */
+
+static bool
+get_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob, struct loop *loop)
+{
+ /* The upper limit of the branch path in the loop is 10000. */
+ const int MAX_BB_BRANCH_PATH = 10000;
+
+ if (loop->header == NULL || loop->latch == NULL
+ || loop->header == loop->latch)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "get_bb_prob failed: without the header bb or "
+ "latch bb\n");
+ return false;
+ }
+
+ bb_bp *latch_branch_prob = bb_branch_prob.get (loop->latch);
+ bb_bp *header_branch_prob = bb_branch_prob.get (loop->header);
+ if (header_branch_prob == NULL || latch_branch_prob == NULL
+ || (latch_branch_prob->true_edge_bb != header_branch_prob->bb
+ && latch_branch_prob->false_edge_bb != header_branch_prob->bb))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "get_bb_prob failed: loop data exception\n");
+ return false;
+ }
+
+ hash_set <basic_block> path_node;
+ int max_path = MAX_BB_BRANCH_PATH;
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path, path_node,
+ header_branch_prob->bb, loop->latch) == false)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "traverse_prune_bb_branch false.\n");
+ return false;
+ }
+ traverse_calculate_bb_prob (bb_branch_prob,
+ header_branch_prob->bb, loop->latch, 1);
+
+ return true;
+}
+
+/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. */
+
+static unsigned
+estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
+{
+ basic_block *body = get_loop_body_in_dom_order (loop);
+ gimple_stmt_iterator gsi;
+ float size = 0;
+ basic_block bb = NULL;
+ hash_map <basic_block, bb_bp> bb_branch_prob;
+
+ if (prefetch_level >= BRANCH_WEIGHTED_AHEAD)
+ {
+ get_bb_branch_prob (bb_branch_prob, loop);
+ if (get_bb_prob (bb_branch_prob, loop) == false)
+ {
+ dump_loop_bb (loop);
+ return 0;
+ }
+ }
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+ /* For nested loops, the bb of the inner loop is not calculated. */
+ if (bb->loop_father != loop)
+ {
+ continue;
+ }
+
+ float size_tmp = 0;
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ size_tmp += estimate_num_insns (gsi_stmt (gsi), weights);
+ }
+
+ if (prefetch_level >= BRANCH_WEIGHTED_AHEAD)
+ {
+ float bb_prob = bb_branch_prob.get (bb)->bb_prob;
+ size += size_tmp * bb_prob;
+ }
+ else
+ {
+ size += size_tmp;
+ }
+ }
+ free (body);
+
+ return unsigned (size);
+}
/* Issue prefetch instructions for array references in LOOP. Returns
true if the LOOP was unrolled. */
@@ -1899,7 +2449,15 @@ loop_prefetch_arrays (class loop *loop)
/* FIXME: the time should be weighted by the probabilities of the blocks in
the loop body. */
- time = tree_num_loop_insns (loop, &eni_time_weights);
+
+ if (prefetch_level >= REFINE_BB_AHEAD)
+ {
+ time = estimate_num_loop_insns (loop, &eni_time_weights);
+ }
+ else
+ {
+ time = tree_num_loop_insns (loop, &eni_time_weights);
+ }
if (time == 0)
return false;
@@ -1913,7 +2471,14 @@ loop_prefetch_arrays (class loop *loop)
if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter))
return false;
- ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+ if (prefetch_level >= REFINE_BB_AHEAD)
+ {
+ ninsns = estimate_num_loop_insns (loop, &eni_size_weights);
+ }
+ else
+ {
+ ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+ }
/* Step 1: gather the memory references. */
refs = gather_memory_references (loop, &no_other_refs, &mem_ref_count);
@@ -1978,10 +2543,49 @@ fail:
return unrolled;
}
+/* Determine if it is a high execution rate loop. */
+
+static bool
+is_high_exec_rate_loop (struct loop *loop)
+{
+ vec<edge> exit_edges = get_loop_exit_edges (loop);
+ if (exit_edges == vNULL)
+ {
+ return false;
+ }
+
+ unsigned i = 0;
+ gcov_type exit_count = 0;
+ edge e = NULL;
+ float loop_exec_rate = 0;
+ gcov_type header_bb_count = loop->header->count.to_gcov_type ();
+ FOR_EACH_VEC_ELT (exit_edges, i, e)
+ {
+ gcov_type exiting_bb_count = e->src->count.to_gcov_type ();
+ float exit_edge_prob = get_edge_prob (e);
+ exit_count += exit_edge_prob * exiting_bb_count;
+
+ loop_exec_rate = 1.0 - ((double) exit_count / header_bb_count);
+
+ if (loop_exec_rate < (float) LOOP_EXECUTION_RATE / 100.0)
+ {
+ return false;
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "loop with high execution rate: %f >= %f\n\n",
+ loop_exec_rate, (float) LOOP_EXECUTION_RATE / 100.0);
+ dump_loop_bb (loop);
+ }
+ return true;
+}
+
/* Issue prefetch instructions for array references in loops. */
unsigned int
-tree_ssa_prefetch_arrays (void)
+tree_ssa_prefetch_arrays (function *fun)
{
class loop *loop;
bool unrolled = false;
@@ -2012,6 +2616,12 @@ tree_ssa_prefetch_arrays (void)
param_min_insn_to_prefetch_ratio);
fprintf (dump_file, " min insn-to-mem ratio: %d \n",
param_prefetch_min_insn_to_mem_ratio);
+ fprintf (dump_file, " prefetch_func_topn: %d \n",
+ param_prefetch_func_topn);
+ fprintf (dump_file, " prefetch_ref_topn: %d \n",
+ param_prefetch_ref_topn);
+ fprintf (dump_file, " high_loop_execution_rate: %d \n",
+ LOOP_EXECUTION_RATE);
fprintf (dump_file, "\n");
}
@@ -2028,13 +2638,42 @@ tree_ssa_prefetch_arrays (void)
set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
}
- FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+ enum li_flags LI = LI_FROM_INNERMOST;
+
+ if (profile_exist (CACHE_MISSES))
+ {
+ LI = LI_ONLY_INNERMOST;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Processing model %d:\n", LI);
+ }
+
+ if (profile_exist (CACHE_MISSES))
+ {
+ sort_ref_by_event_count (fun, CACHE_MISSES);
+ }
+
+ FOR_EACH_LOOP (loop, LI)
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Processing loop %d:\n", loop->num);
+ {
+ fprintf (dump_file, "======================================\n");
+ fprintf (dump_file, "Processing loop %d:\n", loop->num);
+ fprintf (dump_file, "======================================\n");
+ flow_loop_dump (loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
- unrolled |= loop_prefetch_arrays (loop);
+ if (profile_exist (CACHE_MISSES))
+ {
+ if (!is_high_exec_rate_loop (loop))
+ {
+ continue;
+ }
+ }
+ unrolled |= loop_prefetch_arrays (loop);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n\n");
}
@@ -2049,6 +2688,56 @@ tree_ssa_prefetch_arrays (void)
return todo_flags;
}
+/* Determine whether to analyze the function according to
+ the sorting of the function containing cache-miss counts. */
+
+static bool
+should_analyze_func_p (void)
+{
+ gcov_type decl_uid = DECL_UID (current_function_decl);
+ struct rank_info func_rank_info =
+ event_get_func_rank (decl_uid, CACHE_MISSES);
+ if (func_rank_info.total == 0)
+ {
+ return false;
+ }
+ gcov_type func_count = event_get_func_count (decl_uid, CACHE_MISSES);
+ if (func_count == 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "function uid %d cannot find profile data "
+ "and skip prefetch analysis\n",
+ decl_uid);
+ }
+ return false;
+ }
+ if (func_rank_info.rank > PREFETCH_FUNC_TOPN
+ || func_count < PREFETCH_FUNC_COUNTS_THRESHOLD)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "function uid %d total counts is %lu: "
+ "rank %d > topn %d, counts %lu < threshold %lu "
+ "skip prefetch analysis\n",
+ decl_uid, func_count,
+ func_rank_info.rank, PREFETCH_FUNC_TOPN,
+ func_count, PREFETCH_FUNC_COUNTS_THRESHOLD);
+ }
+ return false;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "function uid %d total counts is %lu: "
+ "rank %d in topn %d, counts %lu > threshold %lu "
+ "continue prefetch analysis\n",
+ decl_uid, func_count,
+ func_rank_info.rank, PREFETCH_FUNC_TOPN,
+ func_count, PREFETCH_FUNC_COUNTS_THRESHOLD);
+ }
+ return true;
+}
+
/* Prefetching. */
namespace {
@@ -2085,6 +2774,18 @@ pass_loop_prefetch::execute (function *fun)
if (number_of_loops (fun) <= 1)
return 0;
+ /* Filter only when combined with cache-miss. When the should_analyze_func_p
+ analysis fails (for example, the function without cache-miss count),
+ in order to ensure the accuracy of the prefetch analysis, the function
+ does not perform native prefetch processing. */
+ if (profile_exist (CACHE_MISSES))
+ {
+ if (!should_analyze_func_p ())
+ {
+ return 0;
+ }
+ }
+
if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0)
{
static bool warned = false;
@@ -2099,7 +2800,7 @@ pass_loop_prefetch::execute (function *fun)
return 0;
}
- return tree_ssa_prefetch_arrays ();
+ return tree_ssa_prefetch_arrays (fun);
}
} // anon namespace
--
2.27.0.windows.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yangmingtaip/gcc.git
git@gitee.com:yangmingtaip/gcc.git
yangmingtaip
gcc
gcc
master

搜索帮助