1 Star 0 Fork 33

baozhaoling/gcc

forked from src-anolis-os/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
HYGON-0002-array-widen-compare.patch 65.88 KB
一键复制 编辑 原始数据 按行查看 历史
baozhaoling 提交于 2024-04-08 11:43 . Add Hygon's supported patches
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092
From 4e00b2ddee6f8c173283fed91273a3cb4e8a3f8a Mon Sep 17 00:00:00 2001
From: He Dian <hedian@hygon.cn>
Date: Tue, 14 Nov 2023 11:10:46 +0800
Subject: [PATCH] [feat][gcc]: add feature array-widen-compare
In the narrow-byte array comparison scenario, the types of pointers
pointing to array are extended so that elements of multiple bytes can
be loaded at a time when a wide type is used to dereference an array,
thereby improving the performance of this comparison scenario. In some
extreme situations this may result in unsafe behavior.
This option may generate better or worse code; results are highly dependent
on the structure of loops within the source code.
We can enable this feature by options: -O3 -farray-widen-compare
Signed-off-by: Dian He <hedian@hygon.cn>
---
gcc/Makefile.in | 1 +
gcc/common.opt | 5 +
gcc/doc/invoke.texi | 13 +-
gcc/opt-functions.awk | 2 +-
gcc/passes.def | 1 +
.../gcc.dg/tree-ssa/awiden-compare-1.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-2.c | 90 +
.../gcc.dg/tree-ssa/awiden-compare-3.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-4.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-5.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-6.c | 19 +
.../gcc.dg/tree-ssa/awiden-compare-7.c | 22 +
.../gcc.dg/tree-ssa/awiden-compare-8.c | 24 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-array-widen-compare.c | 1647 +++++++++++++++++
16 files changed, 1906 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
create mode 100644 gcc/tree-ssa-loop-array-widen-compare.c
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31ff95500c9..0aabc6ea3f2 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1653,6 +1653,7 @@ OBJS = \
tree-ssa-loop-ivopts.o \
tree-ssa-loop-manip.o \
tree-ssa-loop-niter.o \
+ tree-ssa-loop-array-widen-compare.o \
tree-ssa-loop-prefetch.o \
tree-ssa-loop-split.o \
tree-ssa-loop-unswitch.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 8a0dafc522d..d3541b4e612 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables
Common Var(flag_asynchronous_unwind_tables) Optimization
Generate unwind tables that are exact at each instruction boundary.
+farray-widen-compare
+Common Report Var(flag_array_widen_compare) Optimization
+Extends types for pointers to arrays to improve array comparsion performance.
+In some extreme situations this may result in unsafe behavior.
+
fauto-inc-dec
Common Var(flag_auto_inc_dec) Init(1) Optimization
Generate auto-inc/dec instructions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index cb83dd8a1cc..a76a3a3a877 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}.
-falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
-fno-allocation-dce -fallow-store-data-races @gol
-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
--fauto-inc-dec -fbranch-probabilities @gol
+-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol
-fcaller-saves @gol
-fcombine-stack-adjustments -fconserve-stack @gol
-fcompare-elim -fcprop-registers -fcrossjumping @gol
@@ -11386,6 +11386,17 @@ This pass is always skipped on architectures that do not have
instructions to support this. Enabled by default at @option{-O1} and
higher on architectures that support this.
+@item -farray-widen-compare
+@opindex farray-widen-compare
+In the narrow-byte array comparison scenario, the types of pointers
+pointing to array are extended so that elements of multiple bytes can
+be loaded at a time when a wide type is used to dereference an array,
+thereby improving the performance of this comparison scenario. In some
+extreme situations this may result in unsafe behavior.
+
+This option may generate better or worse code; results are highly dependent
+on the structure of loops within the source code.
+
@item -fdce
@opindex fdce
Perform dead code elimination (DCE) on RTL@.
diff --git a/gcc/opt-functions.awk b/gcc/opt-functions.awk
index 2aee0b9f1c3..0288fb68adc 100644
--- a/gcc/opt-functions.awk
+++ b/gcc/opt-functions.awk
@@ -179,7 +179,7 @@ function switch_bit_fields (flags)
flag_init("ToLower", flags) \
byte_size_flag
- if (flag_set_p("Report", flags))
+ if (var_name(flags) != "flag_array_widen_compare" && flag_set_p("Report", flags))
print "#error Report option property is dropped"
sub(", $", "", result)
diff --git a/gcc/passes.def b/gcc/passes.def
index 375d3d62d51..8dbb7983e3e 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_dse);
NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
NEXT_PASS (pass_phiopt, true /* early_p */);
+ NEXT_PASS (pass_array_widen_compare);
NEXT_PASS (pass_tail_recursion);
NEXT_PASS (pass_if_to_switch);
NEXT_PASS (pass_convert_switch);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
new file mode 100644
index 00000000000..c2498b12518
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
new file mode 100644
index 00000000000..e5d6738dbd4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
@@ -0,0 +1,90 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define EMPTY_HASH_VALUE 0
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+#define true 1
+
+typedef struct {
+ uint32_t len;
+ uint32_t dist;
+} lzma_match;
+
+
+lzma_match *
+func (
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t depth,
+ uint32_t *const son,
+ const uint32_t cyclic_pos,
+ const uint32_t cyclic_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (true)
+ {
+ const uint32_t delta = pos - cur_match;
+ if (depth-- == 0 || delta >= cyclic_size)
+ {
+ *ptr0 = EMPTY_HASH_VALUE;
+ *ptr1 = EMPTY_HASH_VALUE;
+ return matches;
+ }
+
+ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1);
+
+ const uint8_t *const pb = cur -delta;
+ uint32_t len = my_min(len0, len1);
+
+ if (pb[len] == cur[len])
+ {
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+
+ if (len_best < len)
+ {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit)
+ {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return matches;
+ }
+ }
+ }
+
+ if (pb[len] < cur[len])
+ {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ }
+ else
+ {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
new file mode 100644
index 00000000000..6d0d36f3133
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ break;
+ len = len + 1;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
new file mode 100644
index 00000000000..ee923e3ab21
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ break;
+ len = len + 2;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
new file mode 100644
index 00000000000..ee2340af3f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len-1])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
new file mode 100644
index 00000000000..57e93695765
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len++ != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
new file mode 100644
index 00000000000..07af3edbd24
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (len != len_limit)
+ {
+ len = len + 1;
+ if (pb[len] != cur[len])
+ break;
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
new file mode 100644
index 00000000000..4054e77dbbc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */
+/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define my_min(x, y) ((x) < (y) ? (x) : (y))
+
+uint32_t
+func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
+{
+ uint32_t len = my_min(len0, len1);
+ while (++len != len_limit)
+ {
+ if (pb[len] != cur[len])
+ {
+ len = len - 1;
+ break;
+ }
+ }
+ return len;
+}
+
+/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 2dae5e1c760..794b8017d18 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -216,6 +216,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization")
DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies")
DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier")
DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier")
+DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare")
DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion")
DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering")
DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 606d1d60b85..55ee2fe7f9e 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -453,6 +453,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop-array-widen-compare.c b/gcc/tree-ssa-loop-array-widen-compare.c
new file mode 100644
index 00000000000..83908e385a6
--- /dev/null
+++ b/gcc/tree-ssa-loop-array-widen-compare.c
@@ -0,0 +1,1647 @@
+/* Array widen compare.
+ Copyright (C) 2022-2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "gimple-ssa.h"
+#include "tree-pretty-print.h"
+#include "fold-const.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-into-ssa.h"
+#include "cfganal.h"
+#include "cfgloop.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "cgraph.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "gimple-fold.h"
+
+
+/* This pass handles scenarios similar to the following:
+
+ uint32_t
+ func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
+ const uint8_t *const pb, const uint8_t *const cur)
+ {
+ uint32_t len = my_min (len0, len1);
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+ }
+
+ Features of this type of loop:
+ 1) the loop has two exits;
+ 2) One of the exits comes from the comparison result of the array;
+
+ From the source code point of view, the pass completes the conversion of the
+ above scenario into:
+
+ uint32_t
+ func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
+ const uint8_t *const pb, const uint8_t *const cur)
+ {
+ uint32_t len = my_min (len0, len1);
+ // align_loop
+ for (++len; len + sizeof (uint64_t) <= len_limit; len += sizeof (uint64_t))
+ {
+ uint64_t a = *((uint64_t*)(cur+len));
+ uint64_t b = *((uint64_t*)(pb+len));
+ if (a != b)
+ {
+ int lz = __builtin_ctzll (a ^ b);
+ len += lz / 8;
+ return len;
+ }
+ }
+ // epilogue_loop
+ for (;len != len_limit; ++len)
+ if (pb[len] != cur[len])
+ break;
+ return len;
+ }
+
+ This pass is to complete the conversion of such scenarios from the internal
+ perspective of the compiler:
+ 1) determine_loop_form: The function completes the screening of such
+ scenarios;
+ 2) convert_to_new_loop: The function completes the conversion of
+ origin_loop to new loops, and removes origin_loop;
+ 3) origin_loop_info: The structure is used to record important information
+ of origin_loop: such as loop exit, growth step size
+ of loop induction variable, initial value
+ of induction variable, etc;
+ 4) create_new_loops: The function is used as the key content of the pass
+ to complete the creation of new loops. */
+
+/* The useful information of origin loop. */
+struct origin_loop_info
+{
+ tree base; /* The initial index of the array in the old loop. */
+ tree limit; /* The limit index of the array in the old loop. */
+ tree arr1; /* Array 1 in the old loop. */
+ tree arr2; /* Array 2 in the old loop. */
+ edge entry_edge; /* The edge into the old loop. */
+ basic_block exit_bb1;
+ basic_block exit_bb2;
+ edge exit_e1;
+ edge exit_e2;
+ gimple *cond_stmt1;
+ gimple *cond_stmt2;
+ gimple *update_stmt;
+ bool exist_prolog_assgin;
+ /* Whether the marker has an initial value assigned
+ to the array index. */
+ unsigned HOST_WIDE_INT step;
+ /* The growth step of the loop induction variable. */
+ tree indvar;
+ tree arr2_off_to_arr1;
+ tree body_indvar;
+ auto_vec<tree> arr2_offs_to_arr1;
+ auto_vec<tree_code> arr2_offs_code_to_arr1;
+ auto_vec<gimple *> arr2_offs_to_arr1_without_indvar;
+ basic_block header;
+ basic_block body;
+ basic_block latch;
+};
+
+typedef struct origin_loop_info origin_loop_info;
+
+static origin_loop_info origin_loop;
+hash_map <basic_block, tree> defs_map;
+
+/* Dump the bb information in a loop. */
+
+static void
+dump_loop_bb (struct loop *loop)
+{
+ basic_block *body = get_loop_body_in_dom_order (loop);
+ basic_block bb = NULL;
+
+ for (unsigned i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+ if (bb->loop_father != loop)
+ continue;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i);
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+ }
+ free (body);
+}
+
+/* Return true if the loop has precisely one backedge. */
+
+static bool
+loop_single_backedge_p (class loop *loop)
+{
+ basic_block header = loop->header;
+ basic_block latch = loop->latch;
+
+ gphi_iterator gsi=gsi_start_phis (header);
+ gphi *phi=gsi.phi ();
+ if (!phi)
+ return false;
+ if (phi->nargs!=2)
+ return false;
+
+ return true;
+}
+
+/* Return true if the loop has precisely one preheader BB. */
+
+static bool
+loop_single_preheader_bb (class loop *loop)
+{
+ basic_block header = loop->header;
+ if (EDGE_COUNT (header->preds) != 2)
+ return false;
+
+ edge e1 = EDGE_PRED (header, 0);
+ edge e2 = EDGE_PRED (header, 1);
+
+ if ((e1->src == loop->latch && e2->src->loop_father != loop)
+ || (e2->src == loop->latch && e1->src->loop_father != loop))
+ return true;
+
+ return false;
+}
+
+/* Initialize the origin_loop structure. */
+static void
+init_origin_loop_structure ()
+{
+ origin_loop.base = NULL;
+ origin_loop.limit = NULL;
+ origin_loop.arr1 = NULL;
+ origin_loop.arr2 = NULL;
+ origin_loop.exit_e1 = NULL;
+ origin_loop.exit_e2 = NULL;
+ origin_loop.exit_bb1 = NULL;
+ origin_loop.exit_bb2 =NULL;
+ origin_loop.entry_edge = NULL;
+ origin_loop.cond_stmt1 = NULL;
+ origin_loop.cond_stmt2 = NULL;
+ origin_loop.update_stmt = NULL;
+ origin_loop.exist_prolog_assgin = false;
+ origin_loop.step = 0;
+ origin_loop.indvar = NULL;
+ origin_loop.header=NULL;
+ origin_loop.body=NULL;
+ origin_loop.latch=NULL;
+ origin_loop.arr2_offs_to_arr1.release ();
+ origin_loop.arr2_offs_code_to_arr1.release ();
+ origin_loop.arr2_offs_to_arr1_without_indvar.release ();
+}
+
+/* Get the edge that first entered the loop. */
+
+static edge
+get_loop_preheader_edge (class loop *loop)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, loop->header->preds)
+ if (e->src != loop->latch)
+ break;
+
+ if (!e)
+ {
+ gcc_assert (!loop_outer (loop));
+ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ }
+
+ return e;
+}
+
+/* Make sure the exit condition stmt satisfies a specific form. */
+
+static bool
+check_cond_stmt (gimple *stmt)
+{
+ if (!stmt)
+ return false;
+ if (gimple_code (stmt) != GIMPLE_COND)
+ return false;
+
+ if (gimple_cond_code (stmt) != NE_EXPR
+ && gimple_cond_code (stmt) != EQ_EXPR
+ && gimple_cond_code (stmt) != LT_EXPR)
+ return false;
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ /* The parameter that does not support the cond statement is not SSA_NAME.
+ eg: if (len_1 != 100). */
+ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ return true;
+}
+
+/* Record the exit information in the original loop including exit edge,
+ exit bb block, exit condition stmt,
+ eg: exit_eX origin_exit_bbX cond_stmtX. */
+
+static bool
+record_origin_loop_exit_info (class loop *loop)
+{
+ bool found = false;
+ edge e = NULL;
+ unsigned i = 0;
+ gimple *stmt;
+
+ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL
+ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL
+ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL)
+ return false;
+
+ auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
+ if (exit_edges == vNULL)
+ return false;
+
+ if (exit_edges.length () != 2)
+ goto fail;
+
+ FOR_EACH_VEC_ELT (exit_edges, i, e)
+ {
+ if (e->src == loop->header)
+ {
+ origin_loop.exit_e1 = e;
+ origin_loop.exit_bb1 = e->dest;
+ stmt = gsi_stmt (gsi_last_bb (e->src));
+ if (check_cond_stmt (stmt))
+ origin_loop.cond_stmt1 = stmt;
+ }
+ else
+ {
+ origin_loop.exit_e2 = e;
+ origin_loop.exit_bb2 = e->dest;
+ stmt = gsi_stmt (gsi_last_bb (e->src));
+ if (check_cond_stmt (stmt))
+ origin_loop.cond_stmt2 = stmt;
+ }
+ }
+
+ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL
+ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL
+ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL)
+ found = true;
+
+fail:
+ exit_edges.release ();
+ return found;
+}
+
+/* Returns true if t is SSA_NAME and user variable exists. */
+
+static bool
+ssa_name_var_p (tree t)
+{
+ if (!t || TREE_CODE (t) != SSA_NAME)
+ return false;
+ if (SSA_NAME_VAR (t))
+ return true;
+ return false;
+}
+
+/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */
+
+static bool
+same_ssa_name_var_p (tree t1, tree t2)
+{
+ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2))
+ return false;
+ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2))
+ return true;
+ return false;
+}
+
+/* Get origin loop induction variable upper bound. */
+
+static bool
+get_iv_upper_bound (gimple *stmt)
+{
+ if (origin_loop.limit != NULL)
+ return false;
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+ if (!lhs || !rhs)
+ return false;
+
+ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE)
+ return false;
+
+ gimple *gl=SSA_NAME_DEF_STMT (lhs);
+ gimple *gr=SSA_NAME_DEF_STMT (rhs);
+ if (!gl || !gr)
+ return false;
+
+ tree indvar=origin_loop.indvar;
+ if (same_ssa_name_var_p (lhs, indvar)
+ && !same_ssa_name_var_p (rhs, indvar))
+ origin_loop.limit = rhs;
+ else if (!same_ssa_name_var_p (lhs, indvar)
+ && same_ssa_name_var_p (rhs, indvar))
+ origin_loop.limit = lhs;
+ return true;
+}
+
+/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR,
+ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is
+ INTEGER_CST. */
+
+static bool
+check_update_stmt (gimple *stmt)
+{
+ if (!stmt)
+ return false;
+
+ if (gimple_assign_rhs_code (stmt) != PLUS_EXPR)
+ return false;
+
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ tree rhs2 = gimple_assign_rhs2 (stmt);
+ if (!(TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST))
+ return false;
+
+ if (!same_ssa_name_var_p (rhs1, origin_loop.indvar))
+ return false;
+
+ if (!tree_fits_uhwi_p (rhs2))
+ return false;
+
+ origin_loop.step = tree_to_uhwi (rhs2);
+ if (origin_loop.step != 1)
+ return false;
+
+ return true;
+}
+
+/* Get origin loop induction variable initial value. */
+
+static bool
+get_iv_base (gimple *stmt)
+{
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL)
+ return false;
+
+ basic_block header = gimple_bb (stmt);
+
+ gphi_iterator gsi;
+ edge e;
+ edge_iterator ei;
+ tree iv_after;
+
+ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree res = gimple_phi_result (phi);
+ if (!same_ssa_name_var_p (res, origin_loop.limit==lhs?rhs:lhs))
+ continue;
+ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge);
+ origin_loop.base = base;
+ FOR_EACH_EDGE (e, ei, header->preds)
+ {
+ if (e != origin_loop.entry_edge)
+ {
+ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e);
+ gimple *update = SSA_NAME_DEF_STMT (iv_after);
+ if (!check_update_stmt (update))
+ return false;
+ origin_loop.update_stmt = update;
+ if (gimple_bb (update) == header && iv_after == lhs)
+ origin_loop.exist_prolog_assgin = true;
+ }
+ }
+ }
+
+ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL)
+ return true;
+
+ return false;
+}
+
+/* Record the upper bound and initial value of the induction variable in the
+ original loop; When prolog_assign is present, make sure loop header is in
+ simple form; And the interpretation of prolog_assign is as follows:
+ eg: while (++len != limit)
+ ......
+ For such a loop, ++len will be processed before entering header_bb, and the
+ assign is regarded as the prolog_assign of the loop. */
+
+static bool
+record_origin_loop_header (class loop *loop)
+{
+ basic_block header = loop->header;
+ origin_loop.header=header;
+
+
+ if (origin_loop.entry_edge || origin_loop.base
+ || origin_loop.update_stmt || origin_loop.limit)
+ return false;
+ origin_loop.entry_edge = get_loop_preheader_edge (loop);
+
+ gphi_iterator gsi=gsi_start_phis (header);
+ gphi *phi=gsi.phi ();
+ origin_loop.indvar=gimple_phi_result (phi);
+ if (!origin_loop.indvar)
+ return false;
+
+ for (gimple_stmt_iterator gsi=gsi_last_bb (header);
+ !gsi_end_p (gsi);
+ gsi_prev (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (stmt && is_gimple_debug (stmt))
+ continue;
+ if (stmt && gimple_code (stmt) == GIMPLE_COND)
+ {
+ if (!get_iv_upper_bound (stmt))
+ return false;
+ if (!get_iv_base (stmt))
+ return false;
+ } else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
+ {
+ if (stmt != origin_loop.update_stmt
+ || !origin_loop.exist_prolog_assgin)
+ return false;
+ } else
+ return false;
+ }
+
+ if (!(origin_loop.entry_edge && origin_loop.base
+ && origin_loop.update_stmt && origin_loop.limit))
+ return false;
+
+ return true;
+}
+
+/* When prolog_assign does not exist, make sure that update_stmt exists in the
+ loop latch, and its form is a specific form, eg:
+ len_2 = len_1 + 1. */
+
+static bool
+record_origin_loop_latch (class loop *loop)
+{
+ basic_block latch = loop->latch;
+ basic_block body = gimple_bb (origin_loop.cond_stmt2);
+ origin_loop.latch=latch;
+ gimple_stmt_iterator gsi;
+ gimple *stmt;
+
+ gsi = gsi_start_bb (latch);
+
+ if (origin_loop.exist_prolog_assgin)
+ {
+ if (gsi_end_p (gsi) && latch != body)
+ return true;
+ else if (latch==body)
+ return true;
+ } else
+ {
+ for (gsi = gsi_start_bb (latch); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ stmt = gsi_stmt (gsi);
+ if (stmt == origin_loop.update_stmt)
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree
+ satisfies the POINTER_PLUS_EXPR type. */
+
+static bool
+check_body_mem_ref (tree mem_ref)
+{
+ tree arg0 = TREE_OPERAND (mem_ref , 0);
+ tree arg1 = TREE_OPERAND (mem_ref , 1);
+
+ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
+ && TREE_CODE (arg1) == INTEGER_CST
+ && tree_to_uhwi (arg1) == 0)
+ {
+ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0);
+ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR)
+ return true;
+ }
+ return false;
+}
+
+/* Returns true if the rh2 of the current stmt comes from the indvar in the
+ original loop. */
+
+static bool
+check_body_pointer_plus (gimple *stmt)
+{
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ tree rhs2 = gimple_assign_rhs2 (stmt);
+ if (TREE_CODE (TREE_TYPE (rhs1)) != POINTER_TYPE
+ && gimple_assign_rhs_code (stmt)!=POINTER_PLUS_EXPR)
+ return false;
+
+ tree body_indvar=origin_loop.body_indvar;
+ basic_block body=origin_loop.body;
+ tree tmp_rhs1=NULL, tmp_rhs2=NULL, tmp_lhs=NULL, off_without_indvar;
+ gimple *g=NULL;
+ for (g=SSA_NAME_DEF_STMT (rhs2);
+ g && gimple_bb (g)==body;
+ g=SSA_NAME_DEF_STMT (tmp_rhs1))
+ {
+ tmp_rhs1=gimple_assign_rhs1 (g);
+ off_without_indvar=tmp_rhs2=gimple_assign_rhs2 (g);
+ tmp_lhs=gimple_assign_lhs (g);
+ if (tmp_lhs==body_indvar)
+ break;
+ if (gimple_assign_rhs_code (g)!=MINUS_EXPR
+ && gimple_assign_rhs_code (g)!=PLUS_EXPR)
+ return false;
+ if (!tmp_rhs2)
+ return false;
+ origin_loop.arr2_offs_to_arr1.safe_push (tmp_rhs2);
+ origin_loop.arr2_offs_code_to_arr1.safe_push (gimple_assign_rhs_code (g));
+
+ g=SSA_NAME_DEF_STMT (tmp_rhs2);
+ if (g && (gimple_bb (g)==origin_loop.header
+ || gimple_bb (g)==origin_loop.body
+ || gimple_bb (g)==origin_loop.latch))
+ {
+ tmp_rhs1=gimple_assign_rhs1 (g);
+ tmp_rhs2=gimple_assign_rhs2 (g);
+ if (tmp_rhs1 && SSA_NAME_DEF_STMT (tmp_rhs1)
+ && (gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.header
+ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.body
+ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.latch))
+ return false;
+ if (tmp_rhs2 && SSA_NAME_DEF_STMT (tmp_rhs2)
+ && (gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.header
+ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.body
+ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.latch))
+ return false;
+ origin_loop.arr2_offs_to_arr1_without_indvar.safe_push (
+ SSA_NAME_DEF_STMT (off_without_indvar));
+ } else
+ origin_loop.arr2_offs_to_arr1_without_indvar.safe_push (
+ NULL);
+ }
+
+ if (!origin_loop.arr1)
+ origin_loop.arr1 = rhs1;
+ else if (!origin_loop.arr2)
+ origin_loop.arr2 = rhs1;
+ else
+ return false;
+
+ return true;
+}
+
+/* Record the array comparison information in the original loop, while ensuring
+ that there are only statements related to cont_stmt in the loop body. */
+
+static bool
+record_origin_loop_body (class loop *loop)
+{
+ basic_block body = gimple_bb (origin_loop.cond_stmt2);
+ origin_loop.body=body;
+
+
+ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL)
+ return false;
+
+ gimple_stmt_iterator gsi = gsi_start_bb (body);
+ gimple *g = NULL;
+ for (g = gsi_stmt (gsi);
+ g && is_gimple_debug (g);
+ gsi_next (&gsi), g=gsi_stmt (gsi));
+ tree body_indvar=gimple_assign_lhs (gsi_stmt (gsi));
+ if (!body_indvar)
+ return false;
+ origin_loop.body_indvar=body_indvar;
+ for (; !gsi_end_p (gsi); gsi_next (&gsi))
+ gimple_set_visited (gsi_stmt (gsi), false);
+
+ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2);
+ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2);
+ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE)
+ return false;
+
+ auto_vec<tree> stack;
+ stack.safe_push (cond_lhs);
+ stack.safe_push (cond_rhs);
+ gimple_set_visited (origin_loop.cond_stmt2, true);
+
+ while (!stack.is_empty ())
+ {
+ tree op = stack.pop ();
+ gimple *g = SSA_NAME_DEF_STMT (op);
+ if (!g || gimple_bb (g) != body || !is_gimple_assign (g))
+ continue;
+ gimple_set_visited (g, true);
+ if (gimple_assign_rhs_code (g) == MEM_REF)
+ {
+ tree mem_ref = gimple_assign_rhs1 (g);
+ if (!check_body_mem_ref (mem_ref))
+ return false;
+ stack.safe_push (TREE_OPERAND (mem_ref , 0));
+ } else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR)
+ {
+ tree rhs2 = gimple_assign_rhs2 (g);
+ if (!check_body_pointer_plus (g))
+ return false;
+ //stack.safe_push (rhs2);
+ } else if (gimple_assign_rhs_code (g) == NOP_EXPR)
+ {
+ tree rhs = gimple_assign_rhs1 (g);
+ if (!same_ssa_name_var_p (rhs, origin_loop.indvar))
+ return false;
+ stack.safe_push (rhs);
+ } else
+ return false;
+ }
+ if (!origin_loop.arr1 || !origin_loop.arr2)
+ return false;
+
+ return true;
+}
+
+/* Dump the original loop information to see if the origin loop
+ form matches. */
+
+static void
+dump_origin_loop_info ()
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nThe origin loop info:\n");
+ fprintf (dump_file, "\n the origin_loop.limit is:\n");
+ print_node (dump_file, "", origin_loop.limit, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.base is:\n");
+ print_node (dump_file, "", origin_loop.base, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.arr1 is:\n");
+ print_node (dump_file, "", origin_loop.arr1, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.arr2 is:\n");
+ print_node (dump_file, "", origin_loop.arr2, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n");
+ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n");
+ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0);
+ fprintf (dump_file, "\n");
+ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n");
+ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+}
+
+/* Returns true only if the exit bb of the original loop is unique and its phi
+ node parameter comes from the same variable. */
+
+static bool
+check_exit_bb (class loop *loop)
+{
+ if (origin_loop.exit_bb1 != origin_loop.exit_bb2
+ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1))
+ return false;
+
+ gphi_iterator gsi;
+ for (gsi = gsi_start_phis (origin_loop.exit_bb1);
+ !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gphi *phi = gsi.phi ();
+ tree res = gimple_phi_result (phi);
+ if (same_ssa_name_var_p (res, origin_loop.indvar))
+ return true;
+ }
+
+ return false;
+}
+
+static bool if_limit_related_to_base_indvar (tree base, tree limit, int depth)
+{
+ if (depth<=0)
+ return false;
+ if (!limit || !base)
+ return false;
+ if (TREE_CODE (limit)!=SSA_NAME || TREE_CODE (base)!=SSA_NAME)
+ return false;
+ gimple *gb=SSA_NAME_DEF_STMT (base);
+ gimple *gl=SSA_NAME_DEF_STMT (limit);
+ if (!gl)
+ return false;
+ tree rhs1=gimple_assign_rhs1 (gl);
+ tree rhs2=gimple_assign_rhs2 (gl);
+ if (rhs1==base || rhs2==base)
+ return true;
+ --depth;
+ return (if_limit_related_to_base_indvar (base, rhs1, depth)
+ || if_limit_related_to_base_indvar (base, rhs2, depth));
+}
+
+
+/* Make sure that the recorded origin_loop information meets the
+ relative requirements. */
+static bool
+check_origin_loop_info (class loop *loop)
+{
+ dump_origin_loop_info ();
+ tree arr1_elem_size, arr2_elem_size;
+
+ if (!check_exit_bb (loop))
+ return false;
+
+ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE
+ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE)
+ return false;
+
+ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1)));
+ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2)));
+
+ if (tree_to_uhwi (arr1_elem_size)>16 || tree_to_uhwi (arr2_elem_size)>16)
+ return false;
+
+ return true;
+}
+
+/* Record the useful information of the original loop and judge whether the
+ information meets the specified conditions. */
+
+static bool
+check_record_loop_form (class loop *loop)
+{
+
+ if (!record_origin_loop_exit_info (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop exit information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_header (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop header information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_latch (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop latch information.\n");
+ }
+ return false;
+ }
+
+ if (!record_origin_loop_body (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to record loop body information.\n");
+ }
+ return false;
+ }
+
+ if (!check_origin_loop_info (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nFailed to check origin loop information.\n");
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/* The main entry for judging whether the loop meets some conditions. */
+
+static bool
+determine_loop_form (class loop *loop)
+{
+ if (loop->inner)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, there is inner loop or"
+ "redundant bb.\n");
+ }
+ return false;
+ }
+
+ if (single_exit (loop) || !loop->latch)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch"
+ "does not exist.\n");
+ }
+ return false;
+ }
+
+ /* Support loop with only one backedge. */
+ if (!loop_single_backedge_p (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, loop back edges are not"
+ "unique.\n");
+ }
+ return false;
+ }
+
+ /* Support loop with only one preheader BB. */
+ if (!loop_single_preheader_bb (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not"
+ "unique.\n");
+ }
+ return false;
+ }
+
+ init_origin_loop_structure ();
+ if (!check_record_loop_form (loop))
+ return false;
+
+ return true;
+}
+
+/* Create prolog bb for newly constructed loop; When prolog_assign exists in
+ the original loop, the corresponding assign needs to be added to prolog_bb;
+ eg: <bb 7>
+ len_16 = len_10 + 1
+ Create simple copy statement when prolog_assign does not exist;
+ eg: <bb 7>
+ len_16 = len_10
+
+ The IR of bb is as above. */
+
+static void
+create_prolog_bb (basic_block &prolog_bb, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer, edge entry_edge)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree lhs1;
+
+ prolog_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (prolog_bb, outer);
+ redirect_edge_and_branch (entry_edge, prolog_bb);
+ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb);
+ gsi = gsi_last_bb (prolog_bb);
+ lhs1 = copy_ssa_name (origin_loop.indvar);
+
+ if (origin_loop.exist_prolog_assgin)
+ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base,
+ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step));
+ else
+ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base);
+ gimple_seq_add_stmt (&stmts, g);
+
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ set_current_def (origin_loop.indvar, lhs1);
+ defs_map.put (prolog_bb, lhs1);
+}
+
+/* Create preheader bb for new loop; In order to ensure the standard form of
+ the loop, add a preheader_bb before loop_header. */
+
+static void
+create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ loop_pred_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (loop_pred_bb, outer);
+ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb);
+ defs_map.put (loop_pred_bb, get_current_def (origin_loop.indvar));
+}
+
+/* Add phi_arg for bb with phi node. */
+
+static void
+rewrite_add_phi_arg (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+ gphi *phi;
+ gphi_iterator gsi;
+ tree res;
+ location_t loc;
+
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ phi = gsi.phi ();
+ res = gimple_phi_result (phi);
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e))
+ continue;
+ tree var = *(defs_map.get (e->src));
+ if (!same_ssa_name_var_p (var, res))
+ continue;
+ if (virtual_operand_p (var))
+ loc = UNKNOWN_LOCATION;
+ else
+ loc = gimple_location (SSA_NAME_DEF_STMT (var));
+ add_phi_arg (phi, var, e, loc);
+ }
+ }
+}
+
+/* Create loop_header BB for align_loop.
+ eg: <bb 9>
+ _18 = (long unsigned int) len_17;
+ _19 = _18 + 8;
+ _20 = (long unsigned int) len_limit_12 (D);
+ if (_19 <= _20)
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_header (basic_block &align_loop_header, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gcond *cond_stmt;
+ gphi *phi;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ align_loop_header = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_header, outer);
+ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb);
+ gsi = gsi_last_bb (align_loop_header);
+ phi = create_phi_node (NULL_TREE, align_loop_header);
+ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
+ res = gimple_phi_result (phi);
+
+ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res);
+ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1,
+ build_int_cst (TREE_TYPE (lhs1), 8));
+ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node,
+ origin_loop.limit);
+ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+
+ set_current_def (origin_loop.indvar, res);
+ defs_map.put (align_loop_header, res);
+}
+
+/* Create loop body BB for align_loop.
+ eg: <bb 10>
+ _21 = (sizetype) len_17;
+ _22 = cur_15 (D) + _21;
+ _23 = MEM[(long unsigned int *)_22];
+ _24 = pb_13 (D) + _21;
+ _25 = MEM[(long unsigned int *)_24];
+ if (_23 != _25)
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_body_bb (basic_block &align_loop_body_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gcond *cond_stmt;
+ tree lhs1, lhs2;
+
+ align_loop_body_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_body_bb, outer);
+ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb);
+ gsi = gsi_last_bb (align_loop_body_bb);
+
+ tree indvar = gimple_build (&stmts, NOP_EXPR, sizetype,
+ get_current_def (origin_loop.indvar));
+
+ tree var=indvar;
+ for (uint64_t i=0; i<origin_loop.arr2_offs_to_arr1.length (); ++i)
+ {
+ tree off=origin_loop.arr2_offs_to_arr1[i];
+ tree_code off_code=origin_loop.arr2_offs_code_to_arr1[i];
+ g=origin_loop.arr2_offs_to_arr1_without_indvar[i];
+ if (g)
+ {
+ off=gimple_build (&stmts, gimple_assign_rhs_code (g),
+ TREE_TYPE (gimple_assign_rhs1 (g)),
+ gimple_assign_rhs1 (g),
+ gimple_assign_rhs2 (g));
+ }
+ var=gimple_build (&stmts, off_code, TREE_TYPE (var), var, off);
+ }
+
+ lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
+ origin_loop.arr2, var);
+ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
+ fold_build2 (MEM_REF, long_unsigned_type_node, lhs1,
+ build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs1 = gimple_assign_lhs (g);
+ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR,
+ TREE_TYPE (origin_loop.arr1), origin_loop.arr1, indvar);
+ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
+ fold_build2 (MEM_REF, long_unsigned_type_node, lhs2,
+ build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs2 = gimple_assign_lhs (g);
+ cond_stmt = gimple_build_cond (NE_EXPR, lhs1, lhs2, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+}
+
+/* Create loop_latch BB for align_loop.
+ eg: <bb 11>
+ len_26 = len_17 + 8;
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb,
+ basic_block dominator_bb, class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ align_loop_latch = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_latch, outer);
+ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb);
+ gsi = gsi_last_bb (align_loop_latch);
+ res = copy_ssa_name (entry_node);
+ g = gimple_build_assign (res, PLUS_EXPR, entry_node,
+ build_int_cst (TREE_TYPE (entry_node), 8));
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (align_loop_latch, res);
+}
+
+/* Create a new loop and add it to outer_loop and return. */
+
+static class loop *
+init_new_loop (class loop *outer_loop, basic_block header, basic_block latch)
+{
+ class loop *new_loop;
+ new_loop = alloc_loop ();
+ new_loop->header = header;
+ new_loop->latch = latch;
+ add_loop (new_loop, outer_loop);
+
+ return new_loop;
+}
+
+/* Create necessary exit BB for align_loop.
+ eg: <bb 12>
+ _27 = _23 ^ _25;
+ _28 = __builtin_ctzll (_27);
+ _29 = _28 >> 3;
+ len_30 = _29 + len_17;
+
+ The IR of bb is as above. */
+
+static void
+create_align_loop_exit_bb (basic_block &align_loop_exit_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gimple *cond_stmt;
+ tree lhs1, lhs2;
+ tree cond_lhs, cond_rhs;
+ gcall *build_ctzll;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ align_loop_exit_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (align_loop_exit_bb, outer);
+ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb);
+ gsi = gsi_last_bb (align_loop_exit_bb);
+
+ cond_stmt = gsi_stmt (gsi_last_bb (after_bb));
+ cond_lhs = gimple_cond_lhs (cond_stmt);
+ cond_rhs = gimple_cond_rhs (cond_stmt);
+
+ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs,
+ cond_rhs);
+ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1,
+ lhs1);
+ lhs1 = make_ssa_name (integer_type_node);
+ gimple_call_set_lhs (build_ctzll, lhs1);
+ gimple_seq_add_stmt (&stmts, build_ctzll);
+ lhs2 = copy_ssa_name (lhs1);
+ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1,
+ build_int_cst (TREE_TYPE (lhs1), 3));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2);
+ lhs2 = copy_ssa_name (entry_node);
+ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node);
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (align_loop_exit_bb, lhs2);
+}
+
+/* Create loop_header BB for epilogue_loop.
+ eg: <bb 14>
+ # len_31 = PHI <len_17 (13), len_37 (16)>
+ if (len_31 != len_limit_12 (D))
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_header (basic_block &epilogue_loop_header,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gcond *cond_stmt;
+ tree res;
+ gphi *phi;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ epilogue_loop_header = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_header, outer);
+ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_header);
+ phi = create_phi_node (NULL_TREE, epilogue_loop_header);
+ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
+ res = gimple_phi_result (phi);
+ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res,
+ origin_loop.limit, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+
+ set_current_def (origin_loop.indvar, res);
+ defs_map.put (epilogue_loop_header, res);
+}
+
+/* Create loop body BB for epilogue_loop.
+ eg: <bb 15>
+ _32 = (sizetype) len_31;
+ _33 = pb_13 (D) + _32;
+ _34 = *_33;
+ _35 = cur_15 (D) + _32;
+ _36 = *_35;
+ if (_34 != _36)
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ gcond *cond_stmt;
+ tree lhs1, lhs2, lhs3;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ epilogue_loop_body_bb = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_body_bb, outer);
+ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_body_bb);
+
+ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node);
+ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR,
+ TREE_TYPE (origin_loop.arr1), origin_loop.arr1, lhs1);
+ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
+ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2,
+ build_int_cst (TREE_TYPE (lhs2), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs2 = gimple_assign_lhs (g);
+
+ for (uint64_t i=0; i<origin_loop.arr2_offs_to_arr1.length (); ++i)
+ {
+ tree off=origin_loop.arr2_offs_to_arr1[i];
+ tree_code off_code=origin_loop.arr2_offs_code_to_arr1[i];
+ g=origin_loop.arr2_offs_to_arr1_without_indvar[i];
+ if (g)
+ {
+ off=gimple_build (&stmts, gimple_assign_rhs_code (g),
+ TREE_TYPE (gimple_assign_rhs1 (g)),
+ gimple_assign_rhs1 (g),
+ gimple_assign_rhs2 (g));
+ }
+ lhs1=gimple_build (&stmts, off_code, TREE_TYPE (lhs1), lhs1, off);
+ }
+
+ lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
+ origin_loop.arr2, lhs1);
+ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
+ fold_build2 (MEM_REF, unsigned_char_type_node, lhs3,
+ build_int_cst (TREE_TYPE (lhs3), 0)));
+ gimple_seq_add_stmt (&stmts, g);
+ lhs3 = gimple_assign_lhs (g);
+ cond_stmt = gimple_build_cond (NE_EXPR, lhs2,
+ lhs3, NULL_TREE, NULL_TREE);
+ gimple_seq_add_stmt (&stmts, cond_stmt);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.indvar));
+}
+
+/* Create loop_latch BB for epilogue_loop.
+ eg: <bb 16>
+ len_37 = len_31 + 1;
+
+ The IR of bb is as above. */
+
+static void
+create_epilogue_loop_latch (basic_block &epilogue_loop_latch,
+ basic_block after_bb, basic_block dominator_bb,
+ class loop *outer)
+{
+ gimple_seq stmts = NULL;
+ gimple_stmt_iterator gsi;
+ gimple *g;
+ tree res;
+
+ tree entry_node = get_current_def (origin_loop.indvar);
+ epilogue_loop_latch = create_empty_bb (after_bb);
+ add_bb_to_loop (epilogue_loop_latch, outer);
+ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb);
+ gsi = gsi_last_bb (epilogue_loop_latch);
+ res = copy_ssa_name (entry_node);
+ g = gimple_build_assign (res, PLUS_EXPR, entry_node,
+ build_int_cst (TREE_TYPE (entry_node), origin_loop.step));
+ gimple_seq_add_stmt (&stmts, g);
+ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
+ defs_map.put (epilogue_loop_latch, res);
+}
+
+/* convert_to_new_loop
+ | |
+ | |
+ | | entry_edge
+ | ______ |
+ | / V V
+ | | -----origin_loop_header---
+ | | | |
+ | | -------------------------\
+ | | | \
+ | | V \___ ___ ___ ___ ___ ___ ___
+ | | -----origin_loop_body----- |
+ | | | | |
+ | | -------------------------\ |
+ | | | \___ ___ ___ ___ |
+ | | V V V
+ | | -----origin_loop_latch---- -----exit_bb------
+ | | | | | |
+ | | /-------------------------- ------------------
+ | \ __ /
+ |
+ | |
+ | ====> |entry_edge
+ | V
+ | -------prolog_bb-----
+ | | |
+ | ---------------------
+ | |
+ | V
+ | -----align_loop_header----
+ | /-----------------> | |
+ |/ --------------------------
+ || / \
+ || V V
+ || ---align_loop_body--- ---epilogue_loop_header--
+ || | | -------| |<---|
+ || --------------------\ / ------------------------- |
+ || | \____ | | |
+ || V | | V |
+ || ---align_loop_latch--- | | ---epilogue_loop_body---- |
+ || | | | | ----| | |
+ || ---------------------- | | / ------------------------- |
+ || / __________/ | | | |
+ || / | | | V |
+ | \ __________/ | | | ---epilogue_loop_latch--- |
+ | | | | | | |
+ | | | | ------------------------- /
+ | V | | | /
+ | -align_loop_exit_bb- | | \______________/
+ | | | | |
+ | -------------------- | |
+ | | | |
+ | | V V
+ | | -----exit_bb------
+ | |---->| |
+ | ------------------
+
+ The origin_loop conversion process starts from entry_edge and ends at
+ exit_bb; The execution logic of origin_loop is completely replaced by
+ align_loop + epilogue_loop:
+ 1) align_loop mainly implements the idea of ​​using wide-type dereference
+ and comparison on array elements, so as to achieve the effect of
+ acceleration; For the corresponding source code understanding, please
+ refer to the description of the pass at the beginning;
+ 2) epilogue_loop processes the previous loop remaining array element
+ comparison. */
+
+
+basic_block alh, alb, all, elh, elb, ell;
+
+static void
+create_new_loops (edge entry_edge)
+{
+ basic_block prolog_bb;
+ basic_block align_loop_header, align_loop_latch, align_loop_body_bb;
+ basic_block align_pred_bb, align_loop_exit_bb;
+ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb;
+ basic_block epilogue_loop_pred_bb;
+ class loop *align_loop;
+ class loop *epilogue_loop;
+
+
+ class loop *outer = entry_edge->src->loop_father;
+
+ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer,
+ entry_edge);
+
+ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer);
+ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU);
+
+ create_align_loop_header (align_loop_header, align_pred_bb,
+ align_pred_bb, outer);
+
+ create_align_loop_body_bb (align_loop_body_bb, align_loop_header,
+ align_loop_header, outer);
+
+ create_align_loop_latch (align_loop_latch, align_loop_body_bb,
+ align_loop_body_bb, outer);
+ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU);
+ rewrite_add_phi_arg (align_loop_header);
+
+ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num);
+ flow_loop_dump (align_loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+
+ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb,
+ align_loop_body_bb, outer);
+
+ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header,
+ align_loop_header, outer);
+ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE);
+
+ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb,
+ epilogue_loop_pred_bb, outer);
+
+ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header,
+ epilogue_loop_header, outer);
+
+ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb,
+ epilogue_loop_body_bb, outer);
+ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header,
+ EDGE_FALLTHRU);
+ rewrite_add_phi_arg (epilogue_loop_header);
+
+ epilogue_loop = init_new_loop (outer, epilogue_loop_header,
+ epilogue_loop_latch);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num);
+ flow_loop_dump (epilogue_loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1,
+ EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1,
+ entry_edge->src);
+ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE);
+
+ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE);
+ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2,
+ entry_edge->src);
+
+ rewrite_add_phi_arg (origin_loop.exit_bb1);
+ rewrite_add_phi_arg (origin_loop.exit_bb2);
+
+ remove_edge (origin_loop.exit_e1);
+ remove_edge (origin_loop.exit_e2);
+
+ alb=align_loop_body_bb;
+ elb=epilogue_loop_body_bb;
+}
+
+/* Make sure that the dominance relationship of the newly inserted cfg
+ is not missing. */
+
+static void
+update_loop_dominator (cdi_direction dir)
+{
+ gcc_assert (dom_info_available_p (dir));
+
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ basic_block imm_bb = get_immediate_dominator (dir, bb);
+ if (!imm_bb || bb == origin_loop.exit_bb1)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, bb,
+ recompute_dominator (CDI_DOMINATORS, bb));
+ continue;
+ }
+ }
+}
+
+/* Clear information about the original loop. */
+
+static void
+remove_origin_loop (class loop *loop)
+{
+ basic_block *body;
+
+ body = get_loop_body_in_dom_order (loop);
+ unsigned n = loop->num_nodes;
+ for (unsigned i = 0; i < n; i++)
+ {
+ delete_basic_block (body[i]);
+ }
+ free (body);
+ delete_loop (loop);
+}
+
+/* Perform the conversion of origin_loop to new_loop. */
+
+static void
+convert_to_new_loop (class loop *loop)
+{
+ create_new_loops (origin_loop.entry_edge);
+ remove_origin_loop (loop);
+ update_loop_dominator (CDI_DOMINATORS);
+ update_ssa (TODO_update_ssa);
+}
+
+/* The main entry of array-widen-compare optimizes. */
+
+static unsigned int
+tree_ssa_array_widen_compare ()
+{
+ unsigned int todo = 0;
+ //class loop *loop;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ flow_loops_dump (dump_file, NULL, 1);
+ fprintf (dump_file, "\nConfirm which loop can be optimized using"
+ " array-widen-compare\n");
+ }
+
+ for (auto loop: loops_list (cfun, LI_FROM_INNERMOST))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "======================================\n");
+ fprintf (dump_file, "Processing loop %d:\n", loop->num);
+ fprintf (dump_file, "======================================\n");
+ flow_loop_dump (loop, dump_file, NULL, 1);
+ fprintf (dump_file, "\n\n");
+ }
+ if (determine_loop_form (loop))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "The %dth loop form is success matched,"
+ "and the loop can be optimized.\n",
+ loop->num);
+ dump_loop_bb (loop);
+ }
+ convert_to_new_loop (loop);
+ }
+ }
+ todo |= (TODO_update_ssa);
+ return todo;
+}
+
+/* Array widen compare. */
+
+namespace {
+
+const pass_data pass_data_tree_array_widen_compare =
+{
+ GIMPLE_PASS,
+ "awiden_compare",
+ OPTGROUP_LOOP,
+ TV_TREE_ARRAY_WIDEN_COMPARE,
+ (PROP_cfg | PROP_ssa),
+ 0,
+ 0,
+ 0,
+ (TODO_update_ssa | TODO_verify_all)
+};
+
+class pass_array_widen_compare : public gimple_opt_pass
+{
+public:
+ pass_array_widen_compare (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+ virtual unsigned int execute (function *);
+
+}; // class pass_array_widen_compare
+
+bool
+pass_array_widen_compare::gate (function *)
+{
+ return (flag_array_widen_compare > 0 && optimize >= 3);
+}
+
+unsigned int
+pass_array_widen_compare::execute (function *fun)
+{
+ if (number_of_loops (fun) <= 1)
+ return 0;
+
+ /* Only supports LP64 data mode. */
+ if (TYPE_PRECISION (long_integer_type_node) != 64
+ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "The current data mode is not supported,"
+ "only the LP64 date mode is supported.\n");
+ return 0;
+ }
+
+ return tree_ssa_array_widen_compare ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_array_widen_compare (gcc::context *ctxt)
+{
+ return new pass_array_widen_compare (ctxt);
+}
--
2.22.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/baozhaoling/gcc.git
git@gitee.com:baozhaoling/gcc.git
baozhaoling
gcc
gcc
a8

搜索帮助

23e8dbc6 1850385 7e0993f3 1850385