1 Star 0 Fork 0

碎月/ppcg1

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
hybrid.c 64.93 KB
一键复制 编辑 原始数据 按行查看 历史
Sven Verdoolaege 提交于 2018-02-17 19:59 . hybrid.c: fix typo in comment
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242
/*
* Copyright 2013 Ecole Normale Superieure
* Copyright 2015 Sven Verdoolaege
*
* Use of this software is governed by the MIT license
*
* Written by Sven Verdoolaege,
* Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
*/
#include <string.h>
#include <isl/space.h>
#include <isl/constraint.h>
#include <isl/val.h>
#include <isl/aff.h>
#include <isl/set.h>
#include <isl/map.h>
#include <isl/union_set.h>
#include <isl/union_map.h>
#include "hybrid.h"
#include "schedule.h"
/* The hybrid tiling implemented in this file is based on
* Grosser et al., "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
/* Bounds on relative dependence distances in input to hybrid tiling.
* upper is an upper bound on the relative dependence distances
* in the first space dimension
* -lower is a lower bound on the relative dependence distances
* in all space dimensions.
*
* In particular,
*
* d_i >= -lower_i d_0
* and
* d_1 <= upper d_0
*
* for each dependence distance vector d, where d_1 is the component
* corresponding to the first space dimension.
*
* upper and lower are always non-negative.
* Some of the values may be NaN if no bound could be found.
*/
struct ppcg_ht_bounds {
isl_val *upper;
isl_multi_val *lower;
};
/* Free "bounds" along with all its fields.
*/
__isl_null ppcg_ht_bounds *ppcg_ht_bounds_free(
__isl_take ppcg_ht_bounds *bounds)
{
if (!bounds)
return NULL;
isl_val_free(bounds->upper);
isl_multi_val_free(bounds->lower);
free(bounds);
return NULL;
}
/* Create a ppcg_ht_bounds object for a band living in "space".
* The bounds are initialized to NaN.
*/
__isl_give ppcg_ht_bounds *ppcg_ht_bounds_alloc(__isl_take isl_space *space)
{
int i, n;
isl_ctx *ctx;
ppcg_ht_bounds *bounds;
if (!space)
return NULL;
ctx = isl_space_get_ctx(space);
bounds = isl_alloc_type(ctx, struct ppcg_ht_bounds);
if (!bounds)
goto error;
bounds->upper = isl_val_nan(ctx);
bounds->lower = isl_multi_val_zero(space);
n = isl_multi_val_dim(bounds->lower, isl_dim_set);
for (i = 0; i < n; ++i) {
isl_val *v = isl_val_copy(bounds->upper);
bounds->lower = isl_multi_val_set_val(bounds->lower, i, v);
}
if (!bounds->lower || !bounds->upper)
return ppcg_ht_bounds_free(bounds);
return bounds;
error:
isl_space_free(space);
return NULL;
}
void ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds *bounds)
{
if (!bounds)
return;
fprintf(stderr, "lower: ");
isl_multi_val_dump(bounds->lower);
fprintf(stderr, "upper: ");
isl_val_dump(bounds->upper);
}
/* Return the upper bound on the relative dependence distances
* in the first space dimension.
*/
__isl_give isl_val *ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds *bounds)
{
if (!bounds)
return NULL;
return isl_val_copy(bounds->upper);
}
/* Replace the upper bound on the relative dependence distances
* in the first space dimension by "upper".
*/
__isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_upper(
__isl_take ppcg_ht_bounds *bounds, __isl_take isl_val *upper)
{
if (!bounds || !upper)
goto error;
isl_val_free(bounds->upper);
bounds->upper = upper;
return bounds;
error:
ppcg_ht_bounds_free(bounds);
isl_val_free(upper);
return NULL;
}
/* Return the lower bound on the relative dependence distances
* in space dimension "pos".
*/
__isl_give isl_val *ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds *bounds,
int pos)
{
if (!bounds)
return NULL;
return isl_multi_val_get_val(bounds->lower, pos);
}
/* Replace the lower bound on the relative dependence distances
* in space dimension "pos" by "lower".
*/
__isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_lower(
__isl_take ppcg_ht_bounds *bounds, int pos, __isl_take isl_val *lower)
{
if (!bounds || !lower)
goto error;
bounds->lower = isl_multi_val_set_val(bounds->lower, pos, lower);
if (!bounds->lower)
return ppcg_ht_bounds_free(bounds);
return bounds;
error:
ppcg_ht_bounds_free(bounds);
isl_val_free(lower);
return NULL;
}
/* Can the bounds on relative dependence distances recorded in "bounds"
* be used to perform hybrid tiling?
* In particular, have appropriate lower and upper bounds been found?
* Any NaN indicates that no corresponding bound was found.
*/
isl_bool ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds *bounds)
{
isl_bool is_nan;
int i, n;
if (!bounds)
return isl_bool_error;
is_nan = isl_val_is_nan(bounds->upper);
if (is_nan < 0)
return isl_bool_error;
if (is_nan)
return isl_bool_false;
n = isl_multi_val_dim(bounds->lower, isl_dim_set);
for (i = 0; i < n; ++i) {
isl_val *v;
v = isl_multi_val_get_val(bounds->lower, i);
is_nan = isl_val_is_nan(v);
if (is_nan < 0)
return isl_bool_error;
if (is_nan)
return isl_bool_false;
isl_val_free(v);
}
return isl_bool_true;
}
/* Structure that represents the basic hexagonal tiling,
* along with information that is needed to perform the hybrid tiling.
*
* "bounds" are the bounds on the dependence distances that
* define the hexagonal shape and the required skewing in the remaining
* space dimensions.
*
* "input_node" points to the input pair of band nodes.
* "input_schedule" is the partial schedule of this input pair of band nodes.
* The space of this schedule is [P -> C], where P is the space
* of the parent node and C is the space of the child node.
*
* "space_sizes" represent the total size of a tile for the space
* dimensions, i.e., those corresponding to the child node.
* The space of "space_sizes" is C.
* If S_0 is the original tile size in the first space dimension,
* then the first entry of "space_sizes" is equal to
* W = 2*S_0 + floor(d_l h) + floor(d_u h).
* The remaining entries are the same as in the original tile sizes.
*
* The basic hexagonal tiling "hex" is defined
* in a "ts" (time-space) space and corresponds to the phase-1 tiles.
* "time_tile" maps the "ts" space to outer time tile.
* Is is equal to ts[t, s] -> floor(t/(2 * S_t)), with S_t the original tile
* size corresponding to the parent node.
* "local_time" maps the "ts" space to the time dimension inside each tile.
* It is equal to ts[t, s] -> t mod (2 S_t), with S_t the original tile
* size corresponding to the parent node.
* "shift_space" shifts the tiles at time tile T = floor(t/(2 S_t))
* in the space dimension such that they align to a multiple of W.
* It is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W,
* with shift_s = S_0 + floor(d_u h).
* "shift_phase" is the shift taken to go from phase 0 to phase 1
* It is equal to ts[t, s] -> ts[t + S_t, s + shift_s],
* with shift_s = S_0 + floor(d_u h).
*
* "project_ts" projects the space of the input schedule to the ts-space.
* It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
*/
struct ppcg_ht_tiling {
int ref;
ppcg_ht_bounds *bounds;
isl_schedule_node *input_node;
isl_multi_union_pw_aff *input_schedule;
isl_multi_val *space_sizes;
isl_aff *time_tile;
isl_aff *local_time;
isl_aff *shift_space;
isl_multi_aff *shift_phase;
isl_set *hex;
isl_multi_aff *project_ts;
};
typedef struct ppcg_ht_tiling ppcg_ht_tiling;
/* Return the space of the pair of band nodes that form the input
* to the hybrid tiling.
* In particular, return the space [P -> C], where P is the space
* of the parent node and C is the space of the child node.
*/
__isl_give isl_space *ppcg_ht_tiling_get_input_space(
__isl_keep ppcg_ht_tiling *tile)
{
if (!tile)
return NULL;
return isl_multi_union_pw_aff_get_space(tile->input_schedule);
}
/* Remove a reference to "tile" and free "tile" along with all its fields
* as soon as the reference count drops to zero.
*/
static __isl_null ppcg_ht_tiling *ppcg_ht_tiling_free(
__isl_take ppcg_ht_tiling *tiling)
{
if (!tiling)
return NULL;
if (--tiling->ref > 0)
return NULL;
ppcg_ht_bounds_free(tiling->bounds);
isl_schedule_node_free(tiling->input_node);
isl_multi_union_pw_aff_free(tiling->input_schedule);
isl_multi_val_free(tiling->space_sizes);
isl_aff_free(tiling->time_tile);
isl_aff_free(tiling->local_time);
isl_aff_free(tiling->shift_space);
isl_multi_aff_free(tiling->shift_phase);
isl_set_free(tiling->hex);
isl_multi_aff_free(tiling->project_ts);
free(tiling);
return NULL;
}
/* Return a new reference to "tiling".
*/
__isl_give ppcg_ht_tiling *ppcg_ht_tiling_copy(
__isl_keep ppcg_ht_tiling *tiling)
{
if (!tiling)
return NULL;
tiling->ref++;
return tiling;
}
/* Return the isl_ctx to which "tiling" belongs.
*/
isl_ctx *ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling *tiling)
{
if (!tiling)
return NULL;
return isl_multi_union_pw_aff_get_ctx(tiling->input_schedule);
}
/* Representation of one of the two phases of hybrid tiling.
*
* "tiling" points to the shared tiling data.
*
* "time_tile", "local_time" and "shift_space" are equal to the corresponding
* fields of "tiling", pulled back to the input space.
* In case of phase 0, these expressions have also been moved
* from phase 1 to phase 0.
*
* "domain" contains the hexagonal tiling of this phase.
*
* "space_shift" is the shift that should be added to the space band
* in order to be able to apply rectangular tiling to the space.
* For phase 1, it is equal to
*
* [P[t] -> C[s_0, s_i]] -> C[(-(2 * shift_s)*T) % W, dl_i * u]
*
* with shift_s = S_0 + floor(d_u h),
* T equal to "time_tile" and u equal to "local_time".
* For phase 0, it is equal to
*
* [P[t] -> C[s_0, s_i]] -> C[shift_s + (-(2 * shift_s)*T) % W, dl_i * u]
*
* "space_tile" is the space tiling. It is equal to
*
* [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
*/
struct ppcg_ht_phase {
ppcg_ht_tiling *tiling;
isl_aff *time_tile;
isl_aff *local_time;
isl_aff *shift_space;
isl_set *domain;
isl_multi_aff *space_shift;
isl_multi_aff *space_tile;
};
/* Free "phase" along with all its fields.
*/
static __isl_null ppcg_ht_phase *ppcg_ht_phase_free(
__isl_take ppcg_ht_phase *phase)
{
if (!phase)
return NULL;
ppcg_ht_tiling_free(phase->tiling);
isl_aff_free(phase->time_tile);
isl_aff_free(phase->local_time);
isl_aff_free(phase->shift_space);
isl_set_free(phase->domain);
isl_multi_aff_free(phase->space_shift);
isl_multi_aff_free(phase->space_tile);
free(phase);
return NULL;
}
/* Wrapper around ppcg_ht_phase_free for use as an argument
* to isl_id_set_free_user.
*/
static void ppcg_ht_phase_free_wrap(void *user)
{
ppcg_ht_phase *phase = user;
ppcg_ht_phase_free(phase);
}
/* Return the domain of hybrid tiling phase "phase".
*/
static __isl_give isl_set *ppcg_ht_phase_get_domain(ppcg_ht_phase *phase)
{
if (!phase)
return NULL;
return isl_set_copy(phase->domain);
}
/* Return the space of the pair of band nodes that form the input
* to the hybrid tiling of which "phase" is a phase.
* In particular, return the space [P -> C], where P is the space
* of the parent node and C is the space of the child node.
*/
static __isl_give isl_space *ppcg_ht_phase_get_input_space(
__isl_keep ppcg_ht_phase *phase)
{
if (!phase)
return NULL;
return ppcg_ht_tiling_get_input_space(phase->tiling);
}
/* Construct the lower left constraint of the hexagonal tile, i.e.,
*
* du a - b <= (2h+1) du - duh
* -du a + b + (2h+1) du - duh >= 0
*
* where duh = floor(du * h).
*
* This constraint corresponds to (6) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_lower_left(__isl_take isl_local_space *ls,
__isl_keep isl_val *h, __isl_keep isl_val *du, __isl_keep isl_val *duh)
{
isl_val *v;
isl_aff *aff;
v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
v = isl_val_mul(v, isl_val_copy(du));
v = isl_val_sub(v, isl_val_copy(duh));
aff = isl_aff_val_on_domain(ls, v);
v = isl_val_neg(isl_val_copy(du));
aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
return isl_inequality_from_aff(aff);
}
/* Construct the lower constraint of the hexagonal tile, i.e.,
*
* a <= 2h+1
* -a + 2h+1 >= 0
*
* This constraint corresponds to (7) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_lower(__isl_take isl_local_space *ls,
__isl_keep isl_val *h)
{
isl_val *v;
isl_aff *aff;
v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
aff = isl_aff_val_on_domain(ls, v);
aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 0, -1);
return isl_inequality_from_aff(aff);
}
/* Construct the lower right constraint of the hexagonal tile, i.e.,
*
* dl a + b <= (2h+1) dl + duh + (s0-1)
* -dl a - b + (2h+1) dl + duh + (s0-1) >= 0
*
* where duh = floor(du * h).
*
* This constraint corresponds to (8) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_lower_right(
__isl_take isl_local_space *ls, __isl_keep isl_val *h,
__isl_keep isl_val *s0, __isl_keep isl_val *dl, __isl_keep isl_val *duh)
{
isl_val *v;
isl_aff *aff;
v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
v = isl_val_mul(v, isl_val_copy(dl));
v = isl_val_add(v, isl_val_copy(duh));
v = isl_val_add(v, isl_val_copy(s0));
v = isl_val_sub_ui(v, 1);
aff = isl_aff_val_on_domain(ls, v);
v = isl_val_neg(isl_val_copy(dl));
aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
return isl_inequality_from_aff(aff);
}
/* Construct the upper left constraint of the hexagonal tile, i.e.,
*
* dl a + b >= h dl - (d - 1)/d with d = den(dl)
* dl a + b - h dl + (d - 1)/d >= 0
*
* This constraint corresponds to (10) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_upper_left(__isl_take isl_local_space *ls,
__isl_keep isl_val *h, __isl_keep isl_val *dl)
{
isl_val *v, *d;
isl_aff *aff;
d = isl_val_get_den_val(dl);
v = isl_val_sub_ui(isl_val_copy(d), 1);
v = isl_val_div(v, d);
v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(dl)));
aff = isl_aff_val_on_domain(ls, v);
aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(dl));
aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
return isl_inequality_from_aff(aff);
}
/* Construct the upper right constraint of the hexagonal tile, i.e.,
*
* du a - b >= du h - duh - (s0-1) - dlh - (d - 1)/d with d = den(du)
* du a - b - du h + duh + (s0-1) + dlh + (d - 1)/d >= 0
*
* where dlh = floor(dl * h) and duh = floor(du * h).
*
* This constraint corresponds to (12) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_upper_right(
__isl_take isl_local_space *ls, __isl_keep isl_val *h,
__isl_keep isl_val *s0, __isl_keep isl_val *du,
__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
{
isl_val *v, *d;
isl_aff *aff;
d = isl_val_get_den_val(du);
v = isl_val_sub_ui(isl_val_copy(d), 1);
v = isl_val_div(v, d);
v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(du)));
v = isl_val_add(v, isl_val_copy(duh));
v = isl_val_add(v, isl_val_copy(dlh));
v = isl_val_add(v, isl_val_copy(s0));
v = isl_val_sub_ui(v, 1);
aff = isl_aff_val_on_domain(ls, v);
aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(du));
aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
return isl_inequality_from_aff(aff);
}
/* Construct the uppper constraint of the hexagonal tile, i.e.,
*
* a >= 0
*
* This constraint corresponds to (13) in
* "Hybrid Hexagonal/Classical Tiling for GPUs".
*/
static __isl_give isl_constraint *hex_upper(__isl_take isl_local_space *ls)
{
isl_aff *aff;
aff = isl_aff_var_on_domain(ls, isl_dim_set, 0);
return isl_inequality_from_aff(aff);
}
/* Construct the basic hexagonal tile shape.
* "space" is the 2D space in which the hexagon should be constructed.
* h is st-1, with st the tile size in the time dimension
* s0 is the tile size in the space dimension
* dl is a bound on the negative relative dependence distances, i.e.,
*
* d_s >= -dl d_t
*
* du is a bound on the positive relative dependence distances, i.e.,
*
* d_s <= du d_t
*
* with (d_t,d_s) any dependence distance vector.
* dlh = floor(dl * h)
* duh = floor(du * h)
*
* The shape of the hexagon is as follows:
*
* 0 dlh dlh+s0-1
* ______ __
* 0 / \_ /
* / \_ /
* h / \ ______ /
* h+1 \_ // \\_
* \_ // \\_
* 2h+1 \______// \\
* 0 duh duh+s0-1
* duh+s0-1+dlh
* duh+s0-1+dlh+1+s0+1
*
* The next hexagon is shifted by duh + dlh + 2 * s0.
*
* The slope of the "/" constraints is dl.
* The slope of the "\_" constraints is du.
*/
static __isl_give isl_set *compute_hexagon(__isl_take isl_space *space,
__isl_keep isl_val *h, __isl_keep isl_val *s0,
__isl_keep isl_val *dl, __isl_keep isl_val *du,
__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
{
isl_local_space *ls;
isl_constraint *c;
isl_basic_set *bset;
ls = isl_local_space_from_space(space);
c = hex_lower_left(isl_local_space_copy(ls), h, du, duh);
bset = isl_basic_set_from_constraint(c);
c = hex_lower(isl_local_space_copy(ls), h);
bset = isl_basic_set_add_constraint(bset, c);
c = hex_lower_right(isl_local_space_copy(ls), h, s0, dl, duh);
bset = isl_basic_set_add_constraint(bset, c);
c = hex_upper_left(isl_local_space_copy(ls), h, dl);
bset = isl_basic_set_add_constraint(bset, c);
c = hex_upper_right(isl_local_space_copy(ls), h, s0, du, dlh, duh);
bset = isl_basic_set_add_constraint(bset, c);
c = hex_upper(ls);
bset = isl_basic_set_add_constraint(bset, c);
return isl_set_from_basic_set(bset);
}
/* Name of the ts-space.
*/
static const char *ts_space_name = "ts";
/* Construct and return the space ts[t, s].
*/
static __isl_give isl_space *construct_ts_space(isl_ctx *ctx)
{
isl_space *s;
s = isl_space_set_alloc(ctx, 0, 2);
s = isl_space_set_tuple_name(s, isl_dim_set, ts_space_name);
return s;
}
/* Name of the local ts-space.
*/
static const char *local_ts_space_name = "local_ts";
/* Construct and return the space local_ts[t, s].
*/
static __isl_give isl_space *construct_local_ts_space(isl_ctx *ctx)
{
isl_space *s;
s = isl_space_set_alloc(ctx, 0, 2);
s = isl_space_set_tuple_name(s, isl_dim_set, local_ts_space_name);
return s;
}
/* Compute the total size of a tile for the space dimensions,
* i.e., those corresponding to the child node
* of the input pattern.
* If S_0 is the original tile size in the first space dimension,
* then the first entry of "space_sizes" is equal to
* W = 2*S_0 + floor(d_l h) + floor(d_u h).
* The remaining entries are the same as in the original tile sizes.
* "tile_sizes" contains the original tile sizes, including
* the tile size corresponding to the parent node.
* "dlh" is equal to floor(d_l h).
* "duh" is equal to floor(d_u h).
*/
static __isl_give isl_multi_val *compute_space_sizes(
__isl_keep isl_multi_val *tile_sizes,
__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
{
isl_val *size;
isl_multi_val *space_sizes;
space_sizes = isl_multi_val_copy(tile_sizes);
space_sizes = isl_multi_val_factor_range(space_sizes);
size = isl_multi_val_get_val(space_sizes, 0);
size = isl_val_mul_ui(size, 2);
size = isl_val_add(size, isl_val_copy(duh));
size = isl_val_add(size, isl_val_copy(dlh));
space_sizes = isl_multi_val_set_val(space_sizes, 0, size);
return space_sizes;
}
/* Compute the offset of phase 1 with respect to phase 0
* in the ts-space ("space").
* In particular, return
*
* ts[st, s0 + duh]
*/
static __isl_give isl_multi_val *compute_phase_shift(
__isl_keep isl_space *space, __isl_keep isl_val *st,
__isl_keep isl_val *s0, __isl_keep isl_val *duh)
{
isl_val *v;
isl_multi_val *phase_shift;
phase_shift = isl_multi_val_zero(isl_space_copy(space));
phase_shift = isl_multi_val_set_val(phase_shift, 0, isl_val_copy(st));
v = isl_val_add(isl_val_copy(duh), isl_val_copy(s0));
phase_shift = isl_multi_val_set_val(phase_shift, 1, v);
return phase_shift;
}
/* Return the function
*
* ts[t, s] -> floor(t/(2 * st))
*
* representing the time tile.
* "space" is the space ts[t, s].
*/
static __isl_give isl_aff *compute_time_tile(__isl_keep isl_space *space,
__isl_keep isl_val *st)
{
isl_val *v;
isl_aff *t;
isl_local_space *ls;
ls = isl_local_space_from_space(isl_space_copy(space));
t = isl_aff_var_on_domain(ls, isl_dim_set, 0);
v = isl_val_mul_ui(isl_val_copy(st), 2);
t = isl_aff_floor(isl_aff_scale_down_val(t, v));
return t;
}
/* Compute a shift in the space dimension for tiles
* at time tile T = floor(t/(2 * S_t))
* such that they align to a multiple of the total space tile dimension W.
* In particular, compute
*
* ts[t, s] -> s + (-(2 * shift_s)*T) % W
*
* where shift_s is the shift of phase 1 with respect to phase 0
* in the space dimension (the first element of "phase_shift").
* W is stored in the first element of "space_sizes".
* "time_tile" is the function
*
* ts[t, s] -> floor(t/(2 * S_T))
*
* Since phase 1 is shifted by shift_s with respect to phase 0,
* the next line of phase 0 (at T+1) is shifted by 2*shift_s
* with respect to the previous line (at T).
* A shift of -(2 * shift_s)*T therefore allows the basic pattern
* (which starts at 0) to be applied.
* However, this shift will be used to obtain the tile coordinate
* in the first space dimension and if the original values
* in the space dimension are non-negative, then the shift should
* not make them negative. Moreover, the shift should be as minimal
* as possible.
* Since the pattern repeats itself with a period of W in the space
* dimension, the shift can be replaced by (-(2 * shift_s)*T) % W.
*/
static __isl_give isl_aff *compute_shift_space(__isl_keep isl_aff *time_tile,
__isl_keep isl_multi_val *space_sizes,
__isl_keep isl_multi_val *phase_shift)
{
isl_val *v;
isl_aff *s, *t;
isl_local_space *ls;
ls = isl_local_space_from_space(isl_aff_get_domain_space(time_tile));
t = isl_aff_copy(time_tile);
v = isl_val_mul_ui(isl_multi_val_get_val(phase_shift, 1), 2);
v = isl_val_neg(v);
t = isl_aff_scale_val(t, v);
v = isl_multi_val_get_val(space_sizes, 0);
t = isl_aff_mod_val(t, v);
s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
s = isl_aff_add(s, t);
return s;
}
/* Give the phase_shift ts[S_t, S_0 + floor(d_u h)],
* compute a function that applies the shift, i.e.,
*
* ts[t, s] -> ts[t + S_t, s + S_0 + floor(d_u h)],
*/
static __isl_give isl_multi_aff *compute_shift_phase(
__isl_keep isl_multi_val *phase_shift)
{
isl_space *space;
isl_multi_aff *shift;
space = isl_multi_val_get_space(phase_shift);
shift = isl_multi_aff_multi_val_on_space(space,
isl_multi_val_copy(phase_shift));
space = isl_multi_aff_get_space(shift);
shift = isl_multi_aff_add(shift, isl_multi_aff_identity(space));
return shift;
}
/* Compute a mapping from the ts-space to the local coordinates
* within each tile. In particular, compute
*
* ts[t, s] -> local_ts[t % (2 S_t), (s + (-(2 * shift_s)*T) % W) % W]
*
* "ts" is the space ts[t, s]
* "local_ts" is the space local_ts[t, s]
* "shift_space" is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W
* "st" is the tile size in the time dimension S_t.
* The first element of "space_sizes" is equal to W.
*/
static __isl_give isl_multi_aff *compute_localize(
__isl_keep isl_space *local_ts, __isl_keep isl_aff *shift_space,
__isl_keep isl_val *st, __isl_keep isl_multi_val *space_sizes)
{
isl_val *v;
isl_space *space;
isl_aff *s, *t;
isl_multi_aff *localize;
space = isl_aff_get_domain_space(shift_space);
local_ts = isl_space_copy(local_ts);
space = isl_space_map_from_domain_and_range(space, local_ts);
localize = isl_multi_aff_identity(space);
t = isl_multi_aff_get_aff(localize, 0);
v = isl_val_mul_ui(isl_val_copy(st), 2);
t = isl_aff_mod_val(t, v);
localize = isl_multi_aff_set_aff(localize, 0, t);
s = isl_aff_copy(shift_space);
v = isl_multi_val_get_val(space_sizes, 0);
s = isl_aff_mod_val(s, v);
localize = isl_multi_aff_set_aff(localize, 1, s);
return localize;
}
/* Set the project_ts field of "tiling".
*
* This field projects the space of the input schedule to the ts-space.
* It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
*/
static __isl_give ppcg_ht_tiling *ppcg_ht_tiling_set_project_ts(
__isl_take ppcg_ht_tiling *tiling)
{
int n;
isl_space *space;
isl_multi_aff *project;
if (!tiling)
return NULL;
space = ppcg_ht_tiling_get_input_space(tiling);
n = isl_space_dim(space, isl_dim_set);
project = isl_multi_aff_project_out_map(space, isl_dim_set, 2, n - 2);
project = isl_multi_aff_set_tuple_name(project,
isl_dim_out, ts_space_name);
if (!project)
return ppcg_ht_tiling_free(tiling);
tiling->project_ts = project;
return tiling;
}
/* Construct a hybrid tiling description from bounds on the dependence
* distances "bounds".
* "input_node" points to the original parent node.
* "input_schedule" is the combined schedule of the parent and child
* node in the input.
* "tile_sizes" are the original, user specified tile sizes.
*/
static __isl_give ppcg_ht_tiling *ppcg_ht_bounds_construct_tiling(
__isl_take ppcg_ht_bounds *bounds,
__isl_keep isl_schedule_node *input_node,
__isl_keep isl_multi_union_pw_aff *input_schedule,
__isl_keep isl_multi_val *tile_sizes)
{
isl_ctx *ctx;
ppcg_ht_tiling *tiling;
isl_multi_val *space_sizes, *phase_shift;
isl_aff *time_tile, *shift_space;
isl_multi_aff *localize;
isl_val *h, *duh, *dlh;
isl_val *st, *s0, *du, *dl;
isl_space *ts, *local_ts;
if (!bounds || !input_node || !input_schedule || !tile_sizes)
goto error;
ctx = isl_multi_union_pw_aff_get_ctx(input_schedule);
tiling = isl_calloc_type(ctx, struct ppcg_ht_tiling);
if (!tiling)
goto error;
tiling->ref = 1;
st = isl_multi_val_get_val(tile_sizes, 0);
h = isl_val_sub_ui(isl_val_copy(st), 1);
s0 = isl_multi_val_get_val(tile_sizes, 1);
du = ppcg_ht_bounds_get_upper(bounds);
dl = ppcg_ht_bounds_get_lower(bounds, 0);
duh = isl_val_floor(isl_val_mul(isl_val_copy(du), isl_val_copy(h)));
dlh = isl_val_floor(isl_val_mul(isl_val_copy(dl), isl_val_copy(h)));
ts = construct_ts_space(ctx);
local_ts = construct_local_ts_space(ctx);
space_sizes = compute_space_sizes(tile_sizes, dlh, duh);
phase_shift = compute_phase_shift(ts, st, s0, duh);
time_tile = compute_time_tile(ts, st);
shift_space = compute_shift_space(time_tile, space_sizes, phase_shift);
localize = compute_localize(local_ts, shift_space, st, space_sizes);
isl_space_free(ts);
tiling->input_node = isl_schedule_node_copy(input_node);
tiling->input_schedule = isl_multi_union_pw_aff_copy(input_schedule);
tiling->space_sizes = space_sizes;
tiling->bounds = bounds;
tiling->local_time = isl_multi_aff_get_aff(localize, 0);
tiling->hex = compute_hexagon(local_ts, h, s0, dl, du, dlh, duh);
tiling->hex = isl_set_preimage_multi_aff(tiling->hex, localize);
tiling->time_tile = time_tile;
tiling->shift_space = shift_space;
tiling->shift_phase = compute_shift_phase(phase_shift);
isl_multi_val_free(phase_shift);
isl_val_free(duh);
isl_val_free(dlh);
isl_val_free(du);
isl_val_free(dl);
isl_val_free(s0);
isl_val_free(st);
isl_val_free(h);
if (!tiling->input_schedule || !tiling->local_time || !tiling->hex ||
!tiling->shift_space || !tiling->shift_phase)
return ppcg_ht_tiling_free(tiling);
tiling = ppcg_ht_tiling_set_project_ts(tiling);
return tiling;
error:
ppcg_ht_bounds_free(bounds);
return NULL;
}
/* Are all members of the band node "node" coincident?
*/
static isl_bool all_coincident(__isl_keep isl_schedule_node *node)
{
int i, n;
n = isl_schedule_node_band_n_member(node);
for (i = 0; i < n; ++i) {
isl_bool c;
c = isl_schedule_node_band_member_get_coincident(node, i);
if (c < 0 || !c)
return c;
}
return isl_bool_true;
}
/* Does "node" satisfy the properties of the inner node in the input
* pattern for hybrid tiling?
* That is, is it a band node with only coincident members, of which
* there is at least one?
*/
static isl_bool has_child_properties(__isl_keep isl_schedule_node *node)
{
if (!node)
return isl_bool_error;
if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
return isl_bool_false;
if (isl_schedule_node_band_n_member(node) < 1)
return isl_bool_false;
return all_coincident(node);
}
/* Does "node" satisfy the properties of the outer node in the input
* pattern for hybrid tiling?
* That is, is it a band node with a single member?
*/
static isl_bool has_parent_properties(__isl_keep isl_schedule_node *node)
{
if (!node)
return isl_bool_error;
if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
return isl_bool_false;
if (isl_schedule_node_band_n_member(node) != 1)
return isl_bool_false;
return isl_bool_true;
}
/* Does the parent of "node" satisfy the input patttern for hybrid tiling?
* That is, does "node" satisfy the properties of the inner node and
* does the parent of "node" satisfy the properties of the outer node?
*/
isl_bool ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node *node)
{
isl_bool has_pattern;
has_pattern = has_child_properties(node);
if (has_pattern < 0 || !has_pattern)
return has_pattern;
node = isl_schedule_node_copy(node);
node = isl_schedule_node_parent(node);
has_pattern = has_parent_properties(node);
isl_schedule_node_free(node);
return has_pattern;
}
/* Does "node" satisfy the input patttern for hybrid tiling?
* That is, does "node" satisfy the properties of the outer node and
* does the child of "node" satisfy the properties of the inner node?
*/
isl_bool ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node *node)
{
isl_bool has_pattern;
has_pattern = has_parent_properties(node);
if (has_pattern < 0 || !has_pattern)
return has_pattern;
node = isl_schedule_node_get_child(node, 0);
has_pattern = has_child_properties(node);
isl_schedule_node_free(node);
return has_pattern;
}
/* Check that "node" satisfies the input pattern for hybrid tiling.
* Error out if it does not.
*/
static isl_stat check_input_pattern(__isl_keep isl_schedule_node *node)
{
isl_bool has_pattern;
has_pattern = ppcg_ht_has_input_pattern(node);
if (has_pattern < 0)
return isl_stat_error;
if (!has_pattern)
isl_die(isl_schedule_node_get_ctx(node), isl_error_invalid,
"invalid input pattern for hybrid tiling",
return isl_stat_error);
return isl_stat_ok;
}
/* Extract the input schedule from "node", i.e., the product
* of the partial schedules of the parent and child nodes
* in the input pattern.
*/
static __isl_give isl_multi_union_pw_aff *extract_input_schedule(
__isl_keep isl_schedule_node *node)
{
isl_multi_union_pw_aff *partial, *partial2;
partial = isl_schedule_node_band_get_partial_schedule(node);
node = isl_schedule_node_get_child(node, 0);
partial2 = isl_schedule_node_band_get_partial_schedule(node);
isl_schedule_node_free(node);
return isl_multi_union_pw_aff_range_product(partial, partial2);
}
/* Collect all dependences from "scop" that are relevant for performing
* hybrid tiling on "node" and its child and map them to the schedule
* space of this pair of nodes.
*
* In case live range reordering is not used,
* the flow and the false dependences are collected.
* In case live range reordering is used,
* the flow and the forced dependences are collected, as well
* as the order dependences that are adjacent to non-local
* flow dependences.
*
* In all cases, only dependences that map to the same instance
* of the outer part of the schedule are considered.
*/
static __isl_give isl_map *collect_deps(struct ppcg_scop *scop,
__isl_keep isl_schedule_node *node)
{
isl_space *space;
isl_multi_union_pw_aff *prefix, *partial;
isl_union_map *flow, *other, *dep, *umap;
isl_map *map;
prefix = isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node);
partial = extract_input_schedule(node);
space = isl_multi_union_pw_aff_get_space(partial);
flow = isl_union_map_copy(scop->dep_flow);
flow = isl_union_map_eq_at_multi_union_pw_aff(flow,
isl_multi_union_pw_aff_copy(prefix));
if (!scop->options->live_range_reordering) {
other = isl_union_map_copy(scop->dep_false);
other = isl_union_map_eq_at_multi_union_pw_aff(other, prefix);
} else {
isl_union_map *local, *non_local, *order, *adj;
isl_union_set *domain, *range;
other = isl_union_map_copy(scop->dep_forced);
other = isl_union_map_eq_at_multi_union_pw_aff(other,
isl_multi_union_pw_aff_copy(prefix));
local = isl_union_map_copy(flow);
local = isl_union_map_eq_at_multi_union_pw_aff(local,
isl_multi_union_pw_aff_copy(partial));
non_local = isl_union_map_copy(flow);
non_local = isl_union_map_subtract(non_local, local);
order = isl_union_map_copy(scop->dep_order);
order = isl_union_map_eq_at_multi_union_pw_aff(order, prefix);
adj = isl_union_map_copy(order);
domain = isl_union_map_domain(isl_union_map_copy(non_local));
domain = isl_union_set_coalesce(domain);
adj = isl_union_map_intersect_range(adj, domain);
other = isl_union_map_union(other, adj);
adj = order;
range = isl_union_map_range(non_local);
range = isl_union_set_coalesce(range);
adj = isl_union_map_intersect_domain(adj, range);
other = isl_union_map_union(other, adj);
}
dep = isl_union_map_union(flow, other);
umap = isl_union_map_from_multi_union_pw_aff(partial);
dep = isl_union_map_apply_domain(dep, isl_union_map_copy(umap));
dep = isl_union_map_apply_range(dep, umap);
space = isl_space_map_from_set(space);
map = isl_union_map_extract_map(dep, space);
isl_union_map_free(dep);
map = isl_map_coalesce(map);
return map;
}
/* Given a constraint of the form
*
* a i_0 + b i_1 >= 0
* or
* a i_0 + b i_1 = 0
*
* use it to update one or both of the non-negative bounds
* in "list" = (min, max) such that
*
* i_1 >= -min i_0
* and
* i_1 <= max i_0
*
* If b = 0, then the constraint cannot be used.
* Otherwise, the constraint is equivalent to
*
* sgn(b) i_1 >= - a/abs(b) i_0
* i.e.,
* i_1 >= - a/abs(b) i_0
* or
* i_1 <= a/abs(b) i_0
*
* Set the first or second element of "list" to max(0, a/abs(b)),
* according to the sign of "b". Or set both in case the constraint
* is an equality, taking into account the sign change.
*/
static __isl_give isl_val_list *list_set_min_max(__isl_take isl_val_list *list,
__isl_keep isl_constraint *c)
{
isl_val *a, *b;
int sign;
int pos;
isl_bool eq, is_zero, is_neg;
eq = isl_constraint_is_equality(c);
if (eq < 0)
return isl_val_list_free(list);
b = isl_constraint_get_coefficient_val(c, isl_dim_set, 1);
is_zero = isl_val_is_zero(b);
if (is_zero == isl_bool_true) {
isl_val_free(b);
return list;
}
a = isl_constraint_get_coefficient_val(c, isl_dim_set, 0);
sign = isl_val_sgn(b);
b = isl_val_abs(b);
a = isl_val_div(a, b);
if (eq)
b = isl_val_copy(a);
pos = sign > 0 ? 0 : 1;
is_neg = isl_val_is_neg(a);
if (is_neg == isl_bool_true)
a = isl_val_set_si(a, 0);
list = isl_val_list_set_val(list, pos, a);
if (!eq)
return is_neg < 0 ? isl_val_list_free(list) : list;
pos = 1 - pos;
a = isl_val_neg(b);
is_neg = isl_val_is_neg(a);
if (is_neg == isl_bool_true)
a = isl_val_set_si(a, 0);
list = isl_val_list_set_val(list, pos, a);
return is_neg < 0 ? isl_val_list_free(list) : list;
}
/* If constraint "c" passes through the origin, then try and use it
* to update the non-negative bounds in "list" = (min, max) such that
*
* i_1 >= -min i_0
* and
* i_1 <= max i_0
*/
static isl_stat set_min_max(__isl_take isl_constraint *c, void *user)
{
isl_val *v;
isl_val_list **list = user;
isl_bool is_zero;
v = isl_constraint_get_constant_val(c);
is_zero = isl_val_is_zero(v);
isl_val_free(v);
if (is_zero == isl_bool_true)
*list = list_set_min_max(*list, c);
isl_constraint_free(c);
return is_zero < 0 ? isl_stat_error : isl_stat_ok;
}
/* Given a set of dependence distance vectors "dist", compute
* pair of non-negative bounds min and max such that
*
* d_pos >= -min d_0
* and
* d_pos <= max d_0
*
* and return the pair (min, max).
* If no bound can be found in either direction, then the bound
* is replaced by NaN.
*
* The dependence distances are first projected onto the (d_0, d_pos).
* Then the zero dependence distance is added and the convex hull is computed.
* Finally, the bounds are extracted from the constraints of the convex hull
* that pass through the origin.
*/
static __isl_give isl_val_list *min_max_dist(__isl_keep isl_set *dist, int pos)
{
isl_space *space;
isl_basic_set *hull;
int dim;
isl_ctx *ctx;
isl_val *nan;
isl_val_list *list;
ctx = isl_set_get_ctx(dist);
nan = isl_val_nan(ctx);
list = isl_val_list_alloc(ctx, 2);
list = isl_val_list_add(list, isl_val_copy(nan));
list = isl_val_list_add(list, nan);
dist = isl_set_copy(dist);
dim = isl_set_dim(dist, isl_dim_set);
if (dist && pos >= dim)
isl_die(ctx, isl_error_internal, "position out of bounds",
dist = isl_set_free(dist));
dist = isl_set_project_out(dist, isl_dim_set, pos + 1, dim - (pos + 1));
dist = isl_set_project_out(dist, isl_dim_set, 1, pos - 1);
space = isl_set_get_space(dist);
dist = isl_set_union(dist, isl_set_from_point(isl_point_zero(space)));
dist = isl_set_remove_divs(dist);
hull = isl_set_convex_hull(dist);
if (isl_basic_set_foreach_constraint(hull, &set_min_max, &list) < 0)
list = isl_val_list_free(list);
isl_basic_set_free(hull);
return list;
}
/* Given a schedule node "node" that, together with its child,
* satisfies the input pattern for hybrid tiling, compute bounds
* on the relative dependence distances of the child node with
* respect to the parent node. These bounds are needed to
* construct a hybrid tiling.
*
* First all relevant dependences are collected and mapped
* to the schedule space of the pair of nodes. Then, the
* dependence distances are computed in this space.
*
* These dependence distances are then projected onto a two-dimensional
* space consisting of the single schedule dimension of the outer node
* and one of the schedule dimensions of the inner node.
* The maximal and minimal relative dependence distances are extracted
* from these projections.
* This process is repeated for each of the schedule dimensions
* of the inner node. For the first dimension, both minimal and
* maximal relative dependence distances are stored in the result.
* For the other dimensions, only the minimal relative dependence
* distance is stored.
*/
__isl_give ppcg_ht_bounds *ppcg_ht_compute_bounds(struct ppcg_scop *scop,
__isl_keep isl_schedule_node *node)
{
ppcg_ht_bounds *bnd;
isl_space *space;
isl_map *map;
isl_set *dist;
isl_val_list *pair;
isl_schedule_node *child;
int n;
int i, dim;
if (!scop || !node || check_input_pattern(node) < 0)
return NULL;
child = isl_schedule_node_get_child(node, 0);
space = isl_schedule_node_band_get_space(child);
dim = isl_schedule_node_band_n_member(child);
isl_schedule_node_free(child);
bnd = ppcg_ht_bounds_alloc(space);
if (!bnd)
return NULL;
map = collect_deps(scop, node);
dist = isl_map_deltas(map);
n = isl_set_dim(dist, isl_dim_param);
dist = isl_set_project_out(dist, isl_dim_param, 0, n);
pair = min_max_dist(dist, 1);
bnd = ppcg_ht_bounds_set_lower(bnd, 0, isl_val_list_get_val(pair, 0));
bnd = ppcg_ht_bounds_set_upper(bnd, isl_val_list_get_val(pair, 1));
isl_val_list_free(pair);
for (i = 1; i < dim; ++i) {
pair = min_max_dist(dist, 1 + i);
bnd = ppcg_ht_bounds_set_lower(bnd, i,
isl_val_list_get_val(pair, 0));
isl_val_list_free(pair);
}
isl_set_free(dist);
return bnd;
}
/* Check if all the fields of "phase" are valid, freeing "phase"
* if they are not.
*/
static __isl_give ppcg_ht_phase *check_phase(__isl_take ppcg_ht_phase *phase)
{
if (!phase)
return NULL;
if (!phase->tiling || !phase->local_time ||
!phase->shift_space || !phase->domain)
return ppcg_ht_phase_free(phase);
return phase;
}
/* Construct a ppcg_ht_phase object, that simply copies
* information from "tiling".
* That is, the result is defined over the "ts" space and
* corresponds to phase 1.
*/
static __isl_give ppcg_ht_phase *construct_phase(
__isl_keep ppcg_ht_tiling *tiling)
{
isl_ctx *ctx;
ppcg_ht_phase *phase;
if (!tiling)
return NULL;
ctx = ppcg_ht_tiling_get_ctx(tiling);
phase = isl_calloc_type(ctx, struct ppcg_ht_phase);
if (!phase)
return NULL;
phase->tiling = ppcg_ht_tiling_copy(tiling);
phase->time_tile = isl_aff_copy(tiling->time_tile);
phase->local_time = isl_aff_copy(tiling->local_time);
phase->shift_space = isl_aff_copy(tiling->shift_space);
phase->domain = isl_set_copy(tiling->hex);
return check_phase(phase);
}
/* Align the parameters of the elements of "phase" to those of "space".
*/
static __isl_give ppcg_ht_phase *phase_align_params(
__isl_take ppcg_ht_phase *phase, __isl_take isl_space *space)
{
if (!phase)
goto error;
phase->time_tile = isl_aff_align_params(phase->time_tile,
isl_space_copy(space));
phase->local_time = isl_aff_align_params(phase->local_time,
isl_space_copy(space));
phase->shift_space = isl_aff_align_params(phase->shift_space,
isl_space_copy(space));
phase->domain = isl_set_align_params(phase->domain, space);
return check_phase(phase);
error:
isl_space_free(space);
return NULL;
}
/* Pull back "phase" over "ma".
* That is, take a phase defined over the range of "ma" and
* turn it into a phase defined over the domain of "ma".
*/
static __isl_give ppcg_ht_phase *pullback_phase(__isl_take ppcg_ht_phase *phase,
__isl_take isl_multi_aff *ma)
{
phase = phase_align_params(phase, isl_multi_aff_get_space(ma));
if (!phase)
goto error;
phase->time_tile = isl_aff_pullback_multi_aff(phase->time_tile,
isl_multi_aff_copy(ma));
phase->local_time = isl_aff_pullback_multi_aff(phase->local_time,
isl_multi_aff_copy(ma));
phase->shift_space = isl_aff_pullback_multi_aff(phase->shift_space,
isl_multi_aff_copy(ma));
phase->domain = isl_set_preimage_multi_aff(phase->domain, ma);
return check_phase(phase);
error:
isl_multi_aff_free(ma);
return NULL;
}
/* Pullback "phase" over phase->tiling->shift_phase, which shifts
* phase 0 to phase 1. The pullback therefore takes a phase 1
* description and turns it into a phase 0 description.
*/
static __isl_give ppcg_ht_phase *shift_phase(__isl_take ppcg_ht_phase *phase)
{
ppcg_ht_tiling *tiling;
if (!phase)
return NULL;
tiling = phase->tiling;
return pullback_phase(phase, isl_multi_aff_copy(tiling->shift_phase));
}
/* Take a "phase" defined over the ts-space and plug in the projection
* from the input schedule space to the ts-space.
* The result is then defined over this input schedule space.
*/
static __isl_give ppcg_ht_phase *lift_phase(__isl_take ppcg_ht_phase *phase)
{
ppcg_ht_tiling *tiling;
if (!phase)
return NULL;
tiling = phase->tiling;
return pullback_phase(phase, isl_multi_aff_copy(tiling->project_ts));
}
/* Compute the shift that should be added to the space band
* in order to be able to apply rectangular tiling to the space.
* Store the shift in phase->space_shift.
*
* In the first dimension, it is equal to shift_space - s.
* For phase 1, this results in
*
* (-(2 * shift_s)*T) % W
*
* In phase 0, the "s" in shift_space has been replaced by "s + shift_s",
* so the result is
*
* shift_s + (-(2 * shift_s)*T) % W
*
* In the other dimensions, the shift is equal to
*
* dl_i * local_time.
*/
static __isl_give ppcg_ht_phase *compute_space_shift(
__isl_take ppcg_ht_phase *phase)
{
int i, n;
isl_space *space;
isl_local_space *ls;
isl_aff *aff, *s;
isl_multi_aff *space_shift;
if (!phase)
return NULL;
space = ppcg_ht_phase_get_input_space(phase);
space = isl_space_unwrap(space);
space = isl_space_range_map(space);
space_shift = isl_multi_aff_zero(space);
aff = isl_aff_copy(phase->shift_space);
ls = isl_local_space_from_space(isl_aff_get_domain_space(aff));
s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
aff = isl_aff_sub(aff, s);
space_shift = isl_multi_aff_set_aff(space_shift, 0, aff);
n = isl_multi_aff_dim(space_shift, isl_dim_out);
for (i = 1; i < n; ++i) {
isl_val *v;
isl_aff *time;
v = ppcg_ht_bounds_get_lower(phase->tiling->bounds, i);
time = isl_aff_copy(phase->local_time);
time = isl_aff_scale_val(time, v);
space_shift = isl_multi_aff_set_aff(space_shift, i, time);
}
if (!space_shift)
return ppcg_ht_phase_free(phase);
phase->space_shift = space_shift;
return phase;
}
/* Compute the space tiling and store the result in phase->space_tile.
* The space tiling is of the form
*
* [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
*/
static __isl_give ppcg_ht_phase *compute_space_tile(
__isl_take ppcg_ht_phase *phase)
{
isl_space *space;
isl_multi_val *space_sizes;
isl_multi_aff *space_shift;
isl_multi_aff *tile;
if (!phase)
return NULL;
space = ppcg_ht_phase_get_input_space(phase);
space = isl_space_unwrap(space);
tile = isl_multi_aff_range_map(space);
space_shift = isl_multi_aff_copy(phase->space_shift);
tile = isl_multi_aff_add(space_shift, tile);
space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
tile = isl_multi_aff_scale_down_multi_val(tile, space_sizes);
tile = isl_multi_aff_floor(tile);
if (!tile)
return ppcg_ht_phase_free(phase);
phase->space_tile = tile;
return phase;
}
/* Construct a representation for one of the two phase for hybrid tiling
* "tiling". If "shift" is not set, then the phase is constructed
* directly from the hexagonal tile shape in "tiling", which represents
* the phase-1 tiles. If "shift" is set, then this tile shape is shifted
* back over tiling->shift_phase to obtain the phase-0 tiles.
*
* First copy data from "tiling", then optionally shift the phase and
* finally move the tiling from the "ts" space of "tiling" to
* the space of the input pattern.
*
* After the basic phase has been computed, also compute
* the corresponding space shift.
*/
static __isl_give ppcg_ht_phase *ppcg_ht_tiling_compute_phase(
__isl_keep ppcg_ht_tiling *tiling, int shift)
{
ppcg_ht_phase *phase;
phase = construct_phase(tiling);
if (shift)
phase = shift_phase(phase);
phase = lift_phase(phase);
phase = compute_space_shift(phase);
phase = compute_space_tile(phase);
return phase;
}
/* Consruct a function that is equal to the time tile of "phase0"
* on the domain of "phase0" and equal to the time tile of "phase1"
* on the domain of "phase1".
* The two domains are assumed to form a partition of the input
* schedule space.
*/
static __isl_give isl_pw_multi_aff *combine_time_tile(
__isl_keep ppcg_ht_phase *phase0, __isl_keep ppcg_ht_phase *phase1)
{
isl_aff *T;
isl_pw_aff *time, *time1;
if (!phase0 || !phase1)
return NULL;
T = isl_aff_copy(phase0->time_tile);
time = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase0), T);
T = isl_aff_copy(phase1->time_tile);
time1 = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase1), T);
time = isl_pw_aff_union_add(time, time1);
return isl_pw_multi_aff_from_pw_aff(time);
}
/* Name used in mark nodes that contain a pointer to a ppcg_ht_phase.
*/
static char *ppcg_phase_name = "phase";
/* Does "id" contain a pointer to a ppcg_ht_phase?
* That is, is it called "phase"?
*/
static isl_bool is_phase_id(__isl_keep isl_id *id)
{
const char *name;
name = isl_id_get_name(id);
if (!name)
return isl_bool_error;
return !strcmp(name, ppcg_phase_name);
}
/* Given a mark node with an identifier that points to a ppcg_ht_phase,
* extract this ppcg_ht_phase pointer.
*/
__isl_keep ppcg_ht_phase *ppcg_ht_phase_extract_from_mark(
__isl_keep isl_schedule_node *node)
{
isl_bool is_phase;
isl_id *id;
void *p;
if (!node)
return NULL;
if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
"not a phase mark", return NULL);
id = isl_schedule_node_mark_get_id(node);
is_phase = is_phase_id(id);
p = isl_id_get_user(id);
isl_id_free(id);
if (is_phase < 0)
return NULL;
if (!is_phase)
isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
"not a phase mark", return NULL);
return p;
}
/* Insert a mark node at "node" holding a pointer to "phase".
*/
static __isl_give isl_schedule_node *insert_phase(
__isl_take isl_schedule_node *node, __isl_take ppcg_ht_phase *phase)
{
isl_ctx *ctx;
isl_id *id;
if (!node)
goto error;
ctx = isl_schedule_node_get_ctx(node);
id = isl_id_alloc(ctx, ppcg_phase_name, phase);
if (!id)
goto error;
id = isl_id_set_free_user(id, &ppcg_ht_phase_free_wrap);
node = isl_schedule_node_insert_mark(node, id);
return node;
error:
ppcg_ht_phase_free(phase);
isl_schedule_node_free(node);
return NULL;
}
/* Construct a mapping from the elements of the original pair of bands
* to which tiling was applied that belong to a tile of "phase"
* to that tile, preserving the values for the outer bands.
*
* The mapping is of the form
*
* [[outer] -> [P -> C]] -> [[outer] -> [tile]]
*
* where tile is defined by a concatenation of the time_tile and
* the space_tile.
*/
static __isl_give isl_map *construct_tile_map(__isl_keep ppcg_ht_phase *phase)
{
int depth;
isl_space *space;
isl_multi_aff *ma;
isl_multi_aff *tiling;
isl_map *el2tile;
depth = isl_schedule_node_get_schedule_depth(
phase->tiling->input_node);
space = isl_aff_get_space(phase->time_tile);
space = isl_space_params(space);
space = isl_space_set_from_params(space);
space = isl_space_add_dims(space, isl_dim_set, depth);
space = isl_space_map_from_set(space);
ma = isl_multi_aff_identity(space);
tiling = isl_multi_aff_flat_range_product(
isl_multi_aff_from_aff(isl_aff_copy(phase->time_tile)),
isl_multi_aff_copy(phase->space_tile));
el2tile = isl_map_from_multi_aff(tiling);
el2tile = isl_map_intersect_domain(el2tile,
isl_set_copy(phase->domain));
el2tile = isl_map_product(isl_map_from_multi_aff(ma), el2tile);
return el2tile;
}
/* Return a description of the full tiles of "phase" at the point
* in the original schedule tree where the tiling was applied.
*
* First construct a mapping from the input schedule dimensions
* up to and including the original pair of bands to which hybrid tiling
* was applied to schedule dimensions in which this original pair
* has been replaced by the tiles.
* This mapping is of the form
*
* [[outer] -> [P -> C]] -> [[outer] -> [tile]]
*
* Apply this mapping to the set of all values for the input
* schedule dimensions and then apply its inverse.
* The result is the set of values for the input schedule dimensions
* that would map to any of the tiles. Subtracting from this set
* the set of values that are actually executed produces the set
* of values that belong to a tile but that are not executed.
* Mapping these back to the tiles produces a description of
* the partial tiles. Subtracting these from the set of all tiles
* produces a description of the full tiles in the form
*
* [[outer] -> [tile]]
*/
static __isl_give isl_set *compute_full_tile(__isl_keep ppcg_ht_phase *phase)
{
isl_schedule_node *node;
isl_union_set *domain;
isl_union_map *prefix, *schedule;
isl_set *all, *partial, *all_el;
isl_map *tile2el, *el2tile;
isl_multi_union_pw_aff *mupa;
el2tile = construct_tile_map(phase);
tile2el = isl_map_reverse(isl_map_copy(el2tile));
node = phase->tiling->input_node;
prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
domain = isl_schedule_node_get_domain(node);
mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
schedule = isl_union_map_from_multi_union_pw_aff(mupa);
schedule = isl_union_map_range_product(prefix, schedule);
all_el = isl_set_from_union_set(isl_union_set_apply(domain, schedule));
all_el = isl_set_coalesce(all_el);
all = isl_set_apply(isl_set_copy(all_el), isl_map_copy(el2tile));
partial = isl_set_copy(all);
partial = isl_set_apply(partial, tile2el);
partial = isl_set_subtract(partial, all_el);
partial = isl_set_apply(partial, el2tile);
return isl_set_subtract(all, partial);
}
/* Copy the AST loop types of the non-isolated part to those
* of the isolated part.
*/
static __isl_give isl_schedule_node *set_isolate_loop_type(
__isl_take isl_schedule_node *node)
{
int i, n;
n = isl_schedule_node_band_n_member(node);
for (i = 0; i < n; ++i) {
enum isl_ast_loop_type type;
type = isl_schedule_node_band_member_get_ast_loop_type(node, i);
node = isl_schedule_node_band_member_set_isolate_ast_loop_type(
node, i, type);
}
return node;
}
/* If options->isolate_full_tiles is set, then mark the full tiles
* in "node" for isolation. The full tiles are derived from "phase".
* "node" may point to a part of the tiling, e.g., the space tiling.
*
* The full tiles are originally computed in the form
*
* [[outer] -> [tile]]
*
* However, the band that "node" points to may only contain
* subset of the tile dimensions.
* The description above is therefore treated as
*
* [[outer] -> [before; this; after]]
*
* before is of size "pos"; this is of size "dim"; and
* after is of size "out - pos - dim".
* The after part is first project out. Then the range is split
* into a before and this part and finally the before part is moved
* to the domain, resulting in
*
* [[outer; before] -> [this]]
*
* This description is then used as the isolate option.
*
* The AST loop type for the isolated part is set to be the same
* as that of the non-isolated part.
*/
static __isl_give isl_schedule_node *ppcg_ht_phase_isolate_full_tile_node(
__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
struct ppcg_options *options)
{
int in, out, pos, depth, dim;
isl_space *space;
isl_multi_aff *ma1, *ma2;
isl_set *tile;
isl_map *map;
isl_set *set;
isl_union_set *opt;
if (!options->isolate_full_tiles)
return node;
depth = isl_schedule_node_get_schedule_depth(node);
dim = isl_schedule_node_band_n_member(node);
tile = compute_full_tile(phase);
map = isl_set_unwrap(tile);
in = isl_map_dim(map, isl_dim_in);
out = isl_map_dim(map, isl_dim_out);
pos = depth - in;
map = isl_map_project_out(map, isl_dim_out, pos + dim,
out - (pos + dim));
space = isl_space_range(isl_map_get_space(map));
ma1 = isl_multi_aff_project_out_map(isl_space_copy(space),
isl_dim_set, pos, dim);
ma2 = isl_multi_aff_project_out_map(space, isl_dim_set, 0, pos);
ma1 = isl_multi_aff_range_product(ma1, ma2);
map = isl_map_apply_range(map, isl_map_from_multi_aff(ma1));
map = isl_map_uncurry(map);
map = isl_map_flatten_domain(map);
set = isl_map_wrap(map);
set = isl_set_set_tuple_name(set, "isolate");
opt = isl_schedule_node_band_get_ast_build_options(node);
opt = isl_union_set_add_set(opt, set);
node = isl_schedule_node_band_set_ast_build_options(node, opt);
node = set_isolate_loop_type(node);
return node;
}
/* Insert a band node for performing the space tiling for "phase" at "node".
* In particular, insert a band node with partial schedule
*
* [P[t] -> C[s]] -> C[floor((s + space_shift)/space_size)]
*
* pulled back over the input schedule.
* "options" determines whether full tiles should be separated
* from partial tiles.
*
* The first tile dimension iterates over the hexagons in the same
* phase, which are independent by construction. The first dimension
* is therefore marked coincident.
* All dimensions are also marked for being generated as atomic loops
* because separation is usually not desirable on tile loops.
*/
static __isl_give isl_schedule_node *insert_space_tiling(
__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
struct ppcg_options *options)
{
isl_multi_aff *space_tile;
isl_multi_union_pw_aff *mupa;
if (!phase)
return isl_schedule_node_free(node);
space_tile = isl_multi_aff_copy(phase->space_tile);
mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_tile);
node = isl_schedule_node_insert_partial_schedule(node, mupa);
node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
node = ppcg_ht_phase_isolate_full_tile_node(phase, node, options);
node = isl_schedule_node_band_member_set_coincident(node, 0, 1);
return node;
}
/* Given a pointer "node" to (a copy of) the original child node
* in the input pattern, adjust its partial schedule such that
* it starts at zero within each tile.
*
* That is, replace "s" by (s + space_shift) % space_sizes.
*/
__isl_give isl_schedule_node *ppcg_ht_phase_shift_space_point(
__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node)
{
isl_multi_val *space_sizes;
isl_multi_aff *space_shift;
isl_multi_union_pw_aff *mupa;
space_shift = isl_multi_aff_copy(phase->space_shift);
mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_shift);
node = isl_schedule_node_band_shift(node, mupa);
space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
node = isl_schedule_node_band_mod(node, space_sizes);
return node;
}
/* Does
*
* s0 > delta + 2 * {delta * h} - 1
*
* hold?
*/
static isl_bool wide_enough(__isl_keep isl_val *s0, __isl_keep isl_val *delta,
__isl_keep isl_val *h)
{
isl_val *v, *v2;
isl_bool ok;
v = isl_val_mul(isl_val_copy(delta), isl_val_copy(h));
v2 = isl_val_floor(isl_val_copy(v));
v = isl_val_sub(v, v2);
v = isl_val_mul_ui(v, 2);
v = isl_val_add(v, isl_val_copy(delta));
v = isl_val_sub_ui(v, 1);
ok = isl_val_gt(s0, v);
isl_val_free(v);
return ok;
}
/* Is the tile size specified by "sizes" wide enough in the first space
* dimension, i.e., the base of the hexagon? This ensures that,
* after hybrid tiling using "bounds" and these sizes,
* neighboring hexagons in the same phase are far enough apart
* that they do not depend on each other.
* The test is only meaningful if the bounds are valid.
*
* Let st be (half) the size in the time dimension and s0 the base
* size in the first space dimension. Let delta be the dependence
* distance in either positive or negative direction. In principle,
* it should be enough to have s0 + 1 > delta, i.e., s0 >= delta.
* However, in case of fractional delta, the tile is not extended
* with delta * (st - 1), but instead with floor(delta * (st - 1)).
* The condition therefore needs to be adjusted to
*
* s0 + 1 > delta + 2 {delta * (st - 1)}
*
* (with {} the fractional part) to account for the two slanted sides.
* The condition in the paper "Hybrid Hexagonal/Classical Tiling for GPUs"
* translates to
*
* s0 >= delta + {delta * (st - 1)}
*
* Since 1 > frac(delta * (st - 1)), this condition implies
* the condition above.
*
* The condition is checked for both directions.
*/
isl_bool ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds *bounds,
__isl_keep isl_multi_val *sizes)
{
isl_val *s0, *h;
isl_val *delta;
isl_bool ok;
ok = ppcg_ht_bounds_is_valid(bounds);
if (ok < 0 || !ok)
return ok;
h = isl_val_sub_ui(isl_multi_val_get_val(sizes, 0), 1);
s0 = isl_multi_val_get_val(sizes, 1);
delta = ppcg_ht_bounds_get_lower(bounds, 0);
ok = wide_enough(s0, delta, h);
isl_val_free(delta);
delta = ppcg_ht_bounds_get_upper(bounds);
if (ok == isl_bool_true)
ok = wide_enough(s0, delta, h);
isl_val_free(delta);
isl_val_free(s0);
isl_val_free(h);
return ok;
}
/* Check that the tile will be wide enough in the first space
* dimension, i.e., the base of the hexagon. This ensures that
* neighboring hexagons in the same phase are far enough apart
* that they do not depend on each other.
*
* Error out if the condition fails to hold.
*/
static isl_stat check_width(__isl_keep ppcg_ht_bounds *bounds,
__isl_keep isl_multi_val *sizes)
{
isl_bool ok;
ok = ppcg_ht_bounds_supports_sizes(bounds, sizes);
if (ok < 0)
return isl_stat_error;
if (!ok)
isl_die(isl_multi_val_get_ctx(sizes), isl_error_invalid,
"base of hybrid tiling hexagon not sufficiently wide",
return isl_stat_error);
return isl_stat_ok;
}
/* Given valid bounds on the relative dependence distances for
* the pair of nested nodes that "node" point to, as well as sufficiently
* wide tile sizes "sizes", insert the corresponding time and space tiling
* at "node", along with a pair of phase nodes that can be used
* to make further changes.
* The space of "sizes" should be the product of the spaces
* of the schedules of the pair of parent and child nodes.
* "options" determines whether full tiles should be separated
* from partial tiles.
*
* In particular, given an input of the form
*
* P - C - ...
*
* the output has the form
*
* /- F0 - M0 - CT0 - P - C - ...
* PT - seq
* \- F1 - M1 - CT1 - P - C - ...
*
* PT is the global time tiling. Within each of these tiles,
* two phases are executed in order. Within each phase, the schedule
* space is further subdivided into tiles through CT0 and CT1.
* The first dimension of each of these iterates over the hexagons
* within a phase and these are independent by construction.
* The F0 and F1 filters filter the statement instances that belong
* to the corresponding phase. The M0 and M1 marks contain a pointer
* to a ppcg_ht_phase object that can be used to perform further changes.
*
* After checking that input satisfies the requirements,
* a data structure is constructed that represents the tiling and
* two additional data structures are constructed for the two phases
* of the tiling. These are then used to define the filters F0 and F1 and
* combined to construct the time tiling PT.
* Then the time tiling node PT is inserted, followed by
* the sequence with the two filters, the CT space tiling nodes and
* the phase markers M.
*/
__isl_give isl_schedule_node *ppcg_ht_bounds_insert_tiling(
__isl_take ppcg_ht_bounds *bounds, __isl_take isl_multi_val *sizes,
__isl_take isl_schedule_node *node, struct ppcg_options *options)
{
isl_ctx *ctx;
isl_union_set *phase0;
isl_union_set *phase1;
isl_multi_union_pw_aff *input, *dom_time;
isl_union_pw_multi_aff *upma;
isl_pw_multi_aff *time;
isl_union_set_list *phases;
ppcg_ht_tiling *tiling;
ppcg_ht_phase *phase_0;
ppcg_ht_phase *phase_1;
if (!node || !sizes || !bounds)
goto error;
if (check_input_pattern(node) < 0 || check_width(bounds, sizes) < 0)
goto error;
ctx = isl_schedule_node_get_ctx(node);
input = extract_input_schedule(node);
tiling = ppcg_ht_bounds_construct_tiling(bounds, node, input, sizes);
phase_0 = ppcg_ht_tiling_compute_phase(tiling, 1);
phase_1 = ppcg_ht_tiling_compute_phase(tiling, 0);
time = combine_time_tile(phase_0, phase_1);
ppcg_ht_tiling_free(tiling);
upma = isl_union_pw_multi_aff_from_multi_union_pw_aff(
isl_multi_union_pw_aff_copy(input));
phase0 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_0));
phase0 = isl_union_set_preimage_union_pw_multi_aff(phase0,
isl_union_pw_multi_aff_copy(upma));
phase1 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_1));
phase1 = isl_union_set_preimage_union_pw_multi_aff(phase1, upma);
phases = isl_union_set_list_alloc(ctx, 2);
phases = isl_union_set_list_add(phases, phase0);
phases = isl_union_set_list_add(phases, phase1);
dom_time = isl_multi_union_pw_aff_apply_pw_multi_aff(input, time);
node = isl_schedule_node_insert_partial_schedule(node, dom_time);
node = isl_schedule_node_child(node, 0);
node = isl_schedule_node_insert_sequence(node, phases);
node = isl_schedule_node_child(node, 0);
node = isl_schedule_node_child(node, 0);
node = insert_space_tiling(phase_0, node, options);
node = insert_phase(node, phase_0);
node = isl_schedule_node_parent(node);
node = isl_schedule_node_next_sibling(node);
node = isl_schedule_node_child(node, 0);
node = insert_space_tiling(phase_1, node, options);
node = insert_phase(node, phase_1);
node = isl_schedule_node_parent(node);
node = isl_schedule_node_parent(node);
node = isl_schedule_node_parent(node);
isl_multi_val_free(sizes);
return node;
error:
isl_multi_val_free(sizes);
isl_schedule_node_free(node);
ppcg_ht_bounds_free(bounds);
return NULL;
}
/* Given a branch "node" that contains a sequence node with two phases
* of hybrid tiling as input, call "fn" on each of the two phase marker
* nodes.
*
* That is, the input is as follows
*
* /- F0 - M0 - ...
* ... - seq
* \- F1 - M1 - ...
*
* and "fn" is called on M0 and on M1.
*/
__isl_give isl_schedule_node *hybrid_tile_foreach_phase(
__isl_take isl_schedule_node *node,
__isl_give isl_schedule_node *(*fn)(__isl_take isl_schedule_node *node,
void *user), void *user)
{
int depth0, depth;
depth0 = isl_schedule_node_get_tree_depth(node);
while (node &&
isl_schedule_node_get_type(node) != isl_schedule_node_sequence)
node = isl_schedule_node_child(node, 0);
node = isl_schedule_node_child(node, 0);
node = isl_schedule_node_child(node, 0);
if (!node)
return NULL;
node = fn(node, user);
node = isl_schedule_node_parent(node);
node = isl_schedule_node_next_sibling(node);
node = isl_schedule_node_child(node, 0);
if (!node)
return NULL;
node = fn(node, user);
node = isl_schedule_node_parent(node);
node = isl_schedule_node_parent(node);
depth = isl_schedule_node_get_tree_depth(node);
node = isl_schedule_node_ancestor(node, depth - depth0);
return node;
}
/* This function is called on each of the two phase marks
* in a hybrid tiling tree.
* Drop the phase mark at "node".
*/
static __isl_give isl_schedule_node *drop_phase_mark(
__isl_take isl_schedule_node *node, void *user)
{
isl_id *id;
isl_bool is_phase;
if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
return node;
id = isl_schedule_node_mark_get_id(node);
is_phase = is_phase_id(id);
isl_id_free(id);
if (is_phase < 0)
return isl_schedule_node_free(node);
if (is_phase)
node = isl_schedule_node_delete(node);
return node;
}
/* Given a branch "node" that contains a sequence node with two phases
* of hybrid tiling as input, remove the two phase marker nodes.
*
* That is, the input is as follows
*
* /- F0 - M0 - ...
* ... - seq
* \- F1 - M1 - ...
*
* and the output is
*
* /- F0 - ...
* ... - seq
* \- F1 - ...
*/
__isl_give isl_schedule_node *hybrid_tile_drop_phase_marks(
__isl_take isl_schedule_node *node)
{
return hybrid_tile_foreach_phase(node, &drop_phase_mark, NULL);
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zjn2958/ppcg1.git
git@gitee.com:zjn2958/ppcg1.git
zjn2958
ppcg1
ppcg1
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385