1 Star 0 Fork 5

Meredith/luajit

forked from src-anolis-os/luajit 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0010-PPC-Add-soft-float-support-to-interpreter.patch 73.91 KB
一键复制 编辑 原始数据 按行查看 历史
xingwei-liu 提交于 2022-07-28 14:43 . init package version for an8.6
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761
From fd37da0d586c331b0008fbfd653a9659344fe76f Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Wed, 26 Jul 2017 09:52:19 +0200
Subject: [PATCH 10/72] PPC: Add soft-float support to interpreter.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
---
src/host/buildvm_asm.c | 2 +-
src/lj_arch.h | 29 +-
src/lj_ccall.c | 38 +-
src/lj_ccall.h | 4 +-
src/lj_ccallback.c | 30 +-
src/lj_frame.h | 2 +-
src/lj_ircall.h | 2 +-
src/vm_ppc.dasc | 1249 +++++++++++++++++++++++++++++++++-------
8 files changed, 1101 insertions(+), 255 deletions(-)
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index ffd1490..43595b3 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -338,7 +338,7 @@ void emit_asm(BuildCtx *ctx)
#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
#endif
-#if LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
/* Hard-float ABI. */
fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
#endif
diff --git a/src/lj_arch.h b/src/lj_arch.h
index b770564..0145a7c 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -254,6 +254,29 @@
#else
#define LJ_ARCH_BITS 32
#define LJ_ARCH_NAME "ppc"
+
+#if !defined(LJ_ARCH_HASFPU)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ARCH_HASFPU 0
+#else
+#define LJ_ARCH_HASFPU 1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ABI_SOFTFP 1
+#else
+#define LJ_ABI_SOFTFP 0
+#endif
+#endif
+#endif
+
+#if LJ_ABI_SOFTFP
+#define LJ_ARCH_NOJIT 1 /* NYI */
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#else
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
#endif
#define LJ_TARGET_PPC 1
@@ -262,7 +285,6 @@
#define LJ_TARGET_MASKSHIFT 0
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
#if LJ_TARGET_CONSOLE
#define LJ_ARCH_PPC32ON64 1
@@ -415,16 +437,13 @@
#error "No support for ILP32 model on ARM64"
#endif
#elif LJ_TARGET_PPC
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-#error "No support for PowerPC CPUs without double-precision FPU"
-#endif
#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
#error "No support for little-endian PPC32"
#endif
#if LJ_ARCH_PPC64
#error "No support for PowerPC 64 bit mode (yet)"
#endif
-#ifdef __NO_FPRS__
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS32
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 5c252e5..799be48 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -387,6 +387,24 @@
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
+#define CCALL_HANDLE_GPR \
+ /* Try to pass argument in GPRs. */ \
+ if (n > 1) { \
+ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+ else if (ngpr + n > maxgpr) \
+ ngpr = maxgpr; /* Prevent reordering. */ \
+ } \
+ if (ngpr + n <= maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ } \
+
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#else
#define CCALL_HANDLE_REGARG \
if (isfp) { /* Try to pass argument in FPRs. */ \
if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -395,24 +413,16 @@
d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
goto done; \
} \
- } else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
- if (ctype_isinteger(d->info)) \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- else if (ngpr + n > maxgpr) \
- ngpr = maxgpr; /* Prevent reordering. */ \
- } \
- if (ngpr + n <= maxgpr) { \
- dp = &cc->gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ } else { \
+ CCALL_HANDLE_GPR \
}
+#endif
+#if !LJ_ABI_SOFTFP
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
+#endif
#elif LJ_TARGET_MIPS32
/* -- MIPS o32 calling conventions ---------------------------------------- */
@@ -1080,7 +1090,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-#if LJ_TARGET_X64 || LJ_TARGET_PPC
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 59f6648..6efa48c 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -86,9 +86,9 @@ typedef union FPRArg {
#elif LJ_TARGET_PPC
#define CCALL_NARG_GPR 8
-#define CCALL_NARG_FPR 8
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
#define CCALL_NRET_GPR 4 /* For complex double. */
-#define CCALL_NRET_FPR 1
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
#define CCALL_SPS_EXTRA 4
#define CCALL_SPS_FREE 0
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 846827b..03494a7 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -419,6 +419,23 @@ void lj_ccallback_mcode_free(CTState *cts)
#elif LJ_TARGET_PPC
+#define CALLBACK_HANDLE_GPR \
+ if (n > 1) { \
+ lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
+ ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+ } \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+
+#if LJ_ABI_SOFTFP
+#define CALLBACK_HANDLE_REGARG \
+ CALLBACK_HANDLE_GPR \
+ UNUSED(isfp);
+#else
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
if (nfpr + 1 <= CCALL_NARG_FPR) { \
@@ -427,20 +444,15 @@ void lj_ccallback_mcode_free(CTState *cts)
goto done; \
} \
} else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- } \
- if (ngpr + n <= maxgpr) { \
- sp = &cts->cb.gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ CALLBACK_HANDLE_GPR \
}
+#endif
+#if !LJ_ABI_SOFTFP
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
+#endif
#elif LJ_TARGET_MIPS32
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 19c49a4..04cb5a3 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -226,7 +226,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_OFS_L 36
#define CFRAME_OFS_PC 32
#define CFRAME_OFS_MULTRES 28
-#define CFRAME_SIZE 272
+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
#define CFRAME_SHIFT_MULTRES 3
#endif
#elif LJ_TARGET_MIPS32
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 7312006..9b3883b 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -287,7 +287,7 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
#define fp64_f2l __aeabi_f2lz
#define fp64_f2ul __aeabi_f2ulz
#endif
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
#define softfp_add __adddf3
#define softfp_sub __subdf3
#define softfp_mul __muldf3
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index b4260eb..0839668 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -103,6 +103,18 @@
|// Fixed register assignments for the interpreter.
|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
+|.macro .FPU, a, b, c
+|.if FPU
+| a, b, c
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r14 // Base of current Lua stack frame.
|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +128,10 @@
|.define TISNUM, r22
|.define TISNIL, r23
|.define ZERO, r24
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r20 // Callee-save.
@@ -133,6 +147,7 @@
|
|// Saved temporaries.
|.define SAVE0, r21
+|.define SAVE1, r25
|
|// Calling conventions.
|.define CARG1, r3
@@ -141,8 +156,10 @@
|.define CARG4, r6 // Overlaps TMP3.
|.define CARG5, r7 // Overlaps INS.
|
+|.if FPU
|.define FARG1, f1
|.define FARG2, f2
+|.endif
|
|.define CRET1, r3
|.define CRET2, r4
@@ -213,10 +230,16 @@
|.endif
|.else
|
+|.if FPU
|.define SAVE_LR, 276(sp)
|.define CFRAME_SPACE, 272 // Delta for sp.
|// Back chain for sp: 272(sp) <-- sp entering interpreter
|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
+|.else
+|.define SAVE_LR, 132(sp)
+|.define CFRAME_SPACE, 128 // Delta for sp.
+|// Back chain for sp: 128(sp) <-- sp entering interpreter
+|.endif
|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
|.define SAVE_CR, 52(sp) // 32 bit CR save.
|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +249,25 @@
|.define SAVE_PC, 32(sp)
|.define SAVE_MULTRES, 28(sp)
|.define UNUSED1, 24(sp)
+|.if FPU
|.define TMPD_LO, 20(sp)
|.define TMPD_HI, 16(sp)
|.define TONUM_LO, 12(sp)
|.define TONUM_HI, 8(sp)
+|.else
+|.define SFSAVE_4, 20(sp)
+|.define SFSAVE_3, 16(sp)
+|.define SFSAVE_2, 12(sp)
+|.define SFSAVE_1, 8(sp)
+|.endif
|// Next frame lr: 4(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if FPU
|.define TMPD_BLO, 23(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
|
|.endif
|
@@ -245,7 +277,7 @@
|.else
| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|.macro rest_, reg
|.if GPR64
@@ -253,7 +285,7 @@
|.else
| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|
|.macro saveregs
@@ -323,6 +355,7 @@
|// Trap for not-yet-implemented parts.
|.macro NYI; tw 4, sp, sp; .endmacro
|
+|.if FPU
|// int/FP conversions.
|.macro tonum_i, freg, reg
| xoris reg, reg, 0x8000
@@ -346,6 +379,7 @@
|.macro toint, reg, freg
| toint reg, freg, freg
|.endmacro
+|.endif
|
|//-----------------------------------------------------------------------
|
@@ -533,9 +567,19 @@ static void build_subroutines(BuildCtx *ctx)
| beq >2
|1:
| addic. TMP1, TMP1, -8
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| addi RA, RA, 8
+ |.if FPU
| stfd f0, 0(BASE)
+ |.else
+ | stw CARG1, 0(BASE)
+ | stw CARG2, 4(BASE)
+ |.endif
| addi BASE, BASE, 8
| bney <1
|
@@ -613,23 +657,23 @@ static void build_subroutines(BuildCtx *ctx)
| .toc ld TOCREG, SAVE_TOC
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp BASE, L->base
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
| li ZERO, 0
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li TMP1, LJ_TFALSE
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
+ | .FPU lfs TOBIT, TMPD
| lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
| la RA, -8(BASE) // Results start at BASE-8.
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| addi DISPATCH, DISPATCH, GG_G2DISP
| stw TMP1, 0(RA) // Prepend false to error message.
| li RD, 16 // 2 results: false + error message.
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| b ->vm_returnc
|
|//-----------------------------------------------------------------------
@@ -690,22 +734,22 @@ static void build_subroutines(BuildCtx *ctx)
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
| lwz PC, FRAME_PC(BASE)
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| stb CARG3, L->status
- | stw TMP3, TMPD
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub RD, TMP1, BASE
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| addi RD, RD, 8
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
| li ZERO, 0
| st_vmstate
| andix. TMP0, PC, FRAME_TYPE
| mr MULTRES, RD
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| beq ->BC_RET_Z
| b ->vm_return
@@ -739,19 +783,19 @@ static void build_subroutines(BuildCtx *ctx)
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| add PC, PC, BASE
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub PC, PC, TMP2 // PC = frame delta + frame type
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| sub NARGS8:RC, TMP1, BASE
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| st_vmstate
|
@@ -839,15 +883,30 @@ static void build_subroutines(BuildCtx *ctx)
| lwz INS, -4(PC)
| subi CARG2, RB, 16
| decode_RB8 SAVE0, INS
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz TMP2, 0(RA)
+ | lwz TMP3, 4(RA)
+ |.endif
| add TMP1, BASE, SAVE0
| stp BASE, L->base
| cmplw TMP1, CARG2
| sub CARG3, CARG2, TMP1
| decode_RA8 RA, INS
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw TMP2, 0(CARG2)
+ | stw TMP3, 4(CARG2)
+ |.endif
| bney ->BC_CAT_Z
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| b ->cont_nop
|
|//-- Table indexing metamethods -----------------------------------------
@@ -900,9 +959,19 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
| beq >3
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP0, 0(CRET1)
+ | lwz TMP1, 4(CRET1)
+ |.endif
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
|
|3: // Call __index metamethod.
@@ -920,7 +989,12 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns cTValue * or NULL.
| cmplwi CRET1, 0
| beq >1
+ |.if FPU
| lfd f14, 0(CRET1)
+ |.else
+ | lwz SAVE0, 0(CRET1)
+ | lwz SAVE1, 4(CRET1)
+ |.endif
| b ->BC_TGETR_Z
|1:
| stwx TISNIL, BASE, RA
@@ -975,11 +1049,21 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
+ |.if FPU
| lfdx f0, BASE, RA
+ |.else
+ | lwzux TMP2, RA, BASE
+ | lwz TMP3, 4(RA)
+ |.endif
| beq >3
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
+ |.if FPU
| stfd f0, 0(CRET1)
+ |.else
+ | stw TMP2, 0(CRET1)
+ | stw TMP3, 4(CRET1)
+ |.endif
| ins_next2
|
|3: // Call __newindex metamethod.
@@ -990,7 +1074,12 @@ static void build_subroutines(BuildCtx *ctx)
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
+ |.if FPU
| stfd f0, 16(BASE) // Copy value to third argument.
+ |.else
+ | stw TMP2, 16(BASE)
+ | stw TMP3, 20(BASE)
+ |.endif
| b ->vm_call_dispatch_f
|
|->vmeta_tsetr:
@@ -998,7 +1087,12 @@ static void build_subroutines(BuildCtx *ctx)
| stw PC, SAVE_PC
| bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
| // Returns TValue *.
+ |.if FPU
| stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
| b ->cont_nop
|
|//-- Comparison metamethods ---------------------------------------------
@@ -1037,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_ra: // RA = resultptr
| lwz INS, -4(PC)
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| decode_RA8 TMP1, INS
+ |.if FPU
| stfdx f0, BASE, TMP1
+ |.else
+ | stwux CARG1, TMP1, BASE
+ | stw CARG2, 4(TMP1)
+ |.endif
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
@@ -1245,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_n, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
|.endmacro
|
|.macro .ffunc_nn, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
| lfd FARG2, 8(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ | lwz CARG3, 8(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
| checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bge ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1281,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
| bge cr1, ->fff_fallback
| stw CARG3, 0(RA)
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
+ | addi TMP1, BASE, 8
+ | add TMP2, RA, NARGS8:RC
| stw CARG1, 4(RA)
| beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
|1:
- | cmplw TMP1, RC
- | lfdx f0, BASE, TMP1
- | stfdx f0, RA, TMP1
+ | cmplw TMP1, TMP2
+ |.if FPU
+ | lfd f0, 0(TMP1)
+ | stfd f0, 0(TMP1)
+ |.else
+ | lwz CARG1, 0(TMP1)
+ | lwz CARG2, 4(TMP1)
+ | stw CARG1, -8(TMP1)
+ | stw CARG2, -4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| bney <1
| b ->fff_res
@@ -1303,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
| orc TMP1, TMP2, TMP0
| addi TMP1, TMP1, ~LJ_TISNUM+1
| slwi TMP1, TMP1, 3
+ |.if FPU
| la TMP2, CFUNC:RB->upvalue
| lfdx FARG1, TMP2, TMP1
+ |.else
+ | add TMP1, CFUNC:RB, TMP1
+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
+ |.endif
| b ->fff_resn
|
|//-- Base library: getters and setters ---------------------------------
@@ -1382,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG1, L
| bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
| // Returns cTValue *.
+ |.if FPU
| lfd FARG1, 0(CRET1)
+ |.else
+ | lwz CARG2, 4(CRET1)
+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
+ |.endif
| b ->fff_resn
|
|//-- Base library: conversions ------------------------------------------
@@ -1391,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
| // Only handles the number case inline (without a base argument).
| cmplwi NARGS8:RC, 8
| lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| bne ->fff_fallback // Exactly one argument.
| checknum CARG1; bgt ->fff_fallback
| b ->fff_resn
@@ -1442,12 +1578,23 @@ static void build_subroutines(BuildCtx *ctx)
| cmplwi CRET1, 0
| li CARG3, LJ_TNIL
| beq ->fff_restv // End of traversal: return nil.
- | lfd f0, 8(BASE) // Copy key and value to results.
| la RA, -8(BASE)
+ |.if FPU
+ | lfd f0, 8(BASE) // Copy key and value to results.
| lfd f1, 16(BASE)
| stfd f0, 0(RA)
- | li RD, (2+1)*8
| stfd f1, 8(RA)
+ |.else
+ | lwz CARG1, 8(BASE)
+ | lwz CARG2, 12(BASE)
+ | lwz CARG3, 16(BASE)
+ | lwz CARG4, 20(BASE)
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ | stw CARG3, 8(RA)
+ | stw CARG4, 12(RA)
+ |.endif
+ | li RD, (2+1)*8
| b ->fff_res
|
|.ffunc_1 pairs
@@ -1456,17 +1603,32 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
| stw TISNIL, 8(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|.ffunc ipairs_aux
@@ -1512,14 +1674,24 @@ static void build_subroutines(BuildCtx *ctx)
| stfd FARG2, 0(RA)
|.endif
| ble >2 // Not in array part?
+ |.if FPU
| lwzx TMP2, TMP1, TMP3
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux TMP2, TMP1, TMP3
+ | lwz TMP3, 4(TMP1)
+ |.endif
|1:
| checknil TMP2
| li RD, (0+1)*8
| beq ->fff_res // End of iteration, return 0 results.
| li RD, (2+1)*8
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw TMP2, 8(RA)
+ | stw TMP3, 12(RA)
+ |.endif
| b ->fff_res
|2: // Check for empty hash part first. Otherwise call C function.
| lwz TMP0, TAB:CARG1->hmask
@@ -1533,7 +1705,11 @@ static void build_subroutines(BuildCtx *ctx)
| li RD, (0+1)*8
| beq ->fff_res
| lwz TMP2, 0(CRET1)
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP3, 4(CRET1)
+ |.endif
| b <1
|
|.ffunc_1 ipairs
@@ -1542,12 +1718,22 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
|.if DUALNUM
@@ -1557,7 +1743,12 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| stw ZERO, 12(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|//-- Base library: catch errors ----------------------------------------
@@ -1576,19 +1767,32 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc xpcall
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
+ |.if FPU
| lfd FARG2, 8(BASE)
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
| lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| mr TMP2, BASE
- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
| la BASE, 16(BASE)
| // Remember active hook before pcall.
| rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
+ |.if FPU
| stfd FARG2, 0(TMP2) // Swap function and traceback.
- | subi NARGS8:RC, NARGS8:RC, 16
| stfd FARG1, 8(TMP2)
+ |.else
+ | stw CARG3, 0(TMP2)
+ | stw CARG4, 4(TMP2)
+ | stw CARG1, 8(TMP2)
+ | stw CARG2, 12(TMP2)
+ |.endif
+ | subi NARGS8:RC, NARGS8:RC, 16
| addi PC, TMP1, 16+FRAME_PCALL
| b ->vm_call_dispatch
|
@@ -1631,9 +1835,21 @@ static void build_subroutines(BuildCtx *ctx)
| stp BASE, L->top
|2: // Move args to coroutine.
| cmpw TMP1, NARGS8:RC
+ |.if FPU
| lfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, BASE, TMP1
+ | lwz TMP2, 0(CARG3)
+ | lwz TMP3, 4(CARG3)
+ |.endif
| beq >3
+ |.if FPU
| stfdx f0, CARG2, TMP1
+ |.else
+ | add CARG3, CARG2, TMP1
+ | stw TMP2, 0(CARG3)
+ | stw TMP3, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| b <2
|3:
@@ -1664,8 +1880,17 @@ static void build_subroutines(BuildCtx *ctx)
| stp TMP2, L:SAVE0->top // Clear coroutine stack.
|5: // Move results from coroutine.
| cmplw TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP1
| stfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ | add CARG3, BASE, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| bne <5
|6:
@@ -1690,12 +1915,22 @@ static void build_subroutines(BuildCtx *ctx)
| andix. TMP0, PC, FRAME_TYPE
| la TMP3, -8(TMP3)
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfd f0, 0(TMP3)
+ |.else
+ | lwz CARG1, 0(TMP3)
+ | lwz CARG2, 4(TMP3)
+ |.endif
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| stw TMP1, -8(BASE) // Prepend false to results.
| la RA, -8(BASE)
+ |.if FPU
| stfd f0, 0(BASE) // Copy error message.
+ |.else
+ | stw CARG1, 0(BASE) // Copy error message.
+ | stw CARG2, 4(BASE)
+ |.endif
| b <7
|.else
| mr CARG1, L
@@ -1874,7 +2109,12 @@ static void build_subroutines(BuildCtx *ctx)
| lus CARG1, 0x8000 // -(2^31).
| beqy ->fff_resi
|5:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex func
| b ->fff_resn
|.endmacro
@@ -1898,10 +2138,14 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc math_log
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lfd FARG1, 0(BASE)
+ | lwz CARG1, 0(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
+ |.if FPU
+ | lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex log
| b ->fff_resn
|
@@ -1923,17 +2167,24 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
|.ffunc math_ldexp
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz TMP0, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
+ | lwz TMP1, 8(BASE)
|.if GPR64
| lwz CARG2, 12(BASE)
- |.else
+ |.elif FPU
| lwz CARG1, 12(BASE)
+ |.else
+ | lwz CARG3, 12(BASE)
|.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bne ->fff_fallback
+ | checknum TMP0; bge ->fff_fallback
+ | checknum TMP1; bne ->fff_fallback
|.else
|.ffunc_nn math_ldexp
|.if GPR64
@@ -1948,8 +2199,10 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc_n math_frexp
|.if GPR64
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- |.else
+ |.elif FPU
| la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
+ |.else
+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex frexp
@@ -1958,7 +2211,12 @@ static void build_subroutines(BuildCtx *ctx)
|.if not DUALNUM
| tonum_i FARG2, TMP1
|.endif
+ |.if FPU
| stfd FARG1, 0(RA)
+ |.else
+ | stw CRET1, 0(RA)
+ | stw CRET2, 4(RA)
+ |.endif
| li RD, (2+1)*8
|.if DUALNUM
| stw TISNUM, 8(RA)
@@ -1971,13 +2229,20 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc_n math_modf
|.if GPR64
| la CARG2, -8(BASE)
- |.else
+ |.elif FPU
| la CARG1, -8(BASE)
+ |.else
+ | la CARG3, -8(BASE)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex modf
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, 0(BASE)
+ |.else
+ | stw CRET1, 0(BASE)
+ | stw CRET2, 4(BASE)
+ |.endif
| li RD, (2+1)*8
| b ->fff_res
|
@@ -1985,13 +2250,13 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
| .ffunc_1 name
| checknum CARG3
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
| bne >4
|1: // Handle integers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ | lwz CARG2, 4(SAVE0)
| bge cr1, ->fff_resi
| checknum CARG4
| xoris TMP0, CARG1, 0x8000
@@ -2008,36 +2273,76 @@ static void build_subroutines(BuildCtx *ctx)
|.if GPR64
| rldicl CARG1, CARG1, 0, 32
|.endif
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|3:
| bge ->fff_fallback
| // Convert intermediate result to number and continue below.
+ |.if FPU
| tonum_i FARG1, CARG1
- | lfd FARG2, 0(TMP1)
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | mr CARG2, CARG1
+ | bl ->vm_sfi2d_1
+ | lwz CARG3, 0(SAVE0)
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| b >6
|4:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| bge ->fff_fallback
|5: // Handle numbers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lfd FARG2, 0(TMP1)
+ | lwz CARG3, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ |.if FPU
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| bge cr1, ->fff_resn
- | checknum CARG4; bge >7
+ | checknum CARG3; bge >7
|6:
+ | addi SAVE0, SAVE0, 8
+ |.if FPU
| fsub f0, FARG1, FARG2
- | addi TMP1, TMP1, 8
|.if ismax
| fsel FARG1, f0, FARG1, FARG2
|.else
| fsel FARG1, f0, FARG2, FARG1
|.endif
+ |.else
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | stw CARG3, SFSAVE_3
+ | stw CARG4, SFSAVE_4
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.if ismax
+ | blt >8
+ |.else
+ | bge >8
+ |.endif
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | b <5
+ |8:
+ | lwz CARG1, SFSAVE_3
+ | lwz CARG2, SFSAVE_4
+ |.endif
| b <5
|7: // Convert integer to number and continue above.
- | lwz CARG2, 4(TMP1)
+ | lwz CARG3, 4(SAVE0)
| bne ->fff_fallback
- | tonum_i FARG2, CARG2
+ |.if FPU
+ | tonum_i FARG2, CARG3
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b <6
|.else
| .ffunc_n name
@@ -2237,28 +2542,37 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
|1:
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
|.if DUALNUM
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, 4(SAVE0)
|.else
- | lfd FARG1, 0(TMP1)
+ | lfd FARG1, 0(SAVE0)
|.endif
| bgey cr1, ->fff_resi
| checknum CARG4
|.if DUALNUM
+ |.if FPU
| bnel ->fff_bitop_fb
|.else
+ | beq >3
+ | stw CARG1, SFSAVE_1
+ | bl ->fff_bitop_fb
+ | mr CARG2, CARG1
+ | lwz CARG1, SFSAVE_1
+ |3:
+ |.endif
+ |.else
| fadd FARG1, FARG1, TOBIT
| bge ->fff_fallback
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
|.endif
| ins CARG1, CARG1, CARG2
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|.endmacro
|
@@ -2280,7 +2594,14 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_bit_sh, name, ins, shmod
|.if DUALNUM
| .ffunc_2 bit_..name
+ |.if FPU
| checknum CARG3; bnel ->fff_tobit_fb
+ |.else
+ | checknum CARG3; beq >1
+ | bl ->fff_tobit_fb
+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
+ |1:
+ |.endif
| // Note: no inline conversion from number for 2nd argument!
| checknum CARG4; bne ->fff_fallback
|.else
@@ -2317,27 +2638,77 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_resn:
| lwz PC, FRAME_PC(BASE)
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, -8(BASE)
+ |.else
+ | stw CARG1, -8(BASE)
+ | stw CARG2, -4(BASE)
+ |.endif
| b ->fff_res1
|
|// Fallback FP number to bit conversion.
|->fff_tobit_fb:
|.if DUALNUM
+ |.if FPU
| lfd FARG1, 0(BASE)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG1, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG2, CARG1
+ | mr CARG1, CARG3
+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
+ |->vm_tobit:
+ | slwi TMP2, CARG1, 1
+ | addis TMP2, TMP2, 0x0020
+ | cmpwi TMP2, 0
+ | bge >2
+ | li TMP1, 0x3e0
+ | srawi TMP2, TMP2, 21
+ | not TMP1, TMP1
+ | sub. TMP2, TMP1, TMP2
+ | cmpwi cr7, CARG1, 0
+ | blt >1
+ | slwi TMP1, CARG1, 11
+ | srwi TMP0, CARG2, 21
+ | oris TMP1, TMP1, 0x8000
+ | or TMP1, TMP1, TMP0
+ | srw CARG1, TMP1, TMP2
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |1:
+ | addi TMP2, TMP2, 21
+ | srw TMP1, CARG2, TMP2
+ | slwi CARG2, CARG1, 12
+ | subfic TMP2, TMP2, 20
+ | slw TMP0, CARG2, TMP2
+ | or CARG1, TMP1, TMP0
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |2:
+ | li CARG1, 0
+ | blr
+ |.endif
|.endif
|->fff_bitop_fb:
|.if DUALNUM
- | lfd FARG1, 0(TMP1)
+ |.if FPU
+ | lfd FARG1, 0(SAVE0)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG1, CARG4
+ | b ->vm_tobit
+ |.endif
|.endif
|
|//-----------------------------------------------------------------------
@@ -2530,10 +2901,21 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RA8 RC, INS // Call base.
| beq >2
|1: // Move results down.
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| addic. TMP1, TMP1, -8
| addi RA, RA, 8
+ |.if FPU
| stfdx f0, BASE, RC
+ |.else
+ | add CARG3, BASE, RC
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| addi RC, RC, 8
| bne <1
|2:
@@ -2586,10 +2968,12 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b, c, d
+ |.if FPU
| stfd f..a, 16+a*8(sp)
| stfd f..b, 16+b*8(sp)
| stfd f..c, 16+c*8(sp)
| stfd f..d, 16+d*8(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
@@ -2661,16 +3045,16 @@ static void build_subroutines(BuildCtx *ctx)
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.
| li TISNUM, LJ_TISNUM
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stw TMP3, TMPD
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| li TISNIL, LJ_TNIL
- | stw TMP0, TONUM_HI
- | lfs TONUM, TMPD
+ | .FPU stw TMP0, TONUM_HI
+ | .FPU lfs TONUM, TMPD
| // Modified copy of ins_next which handles function header dispatch, too.
| lwz INS, 0(PC)
| addi PC, PC, 4
@@ -2715,7 +3099,35 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
- |// NYI: Use internal implementations of floor, ceil, trunc.
+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
+ |
+ |.macro sfi2d, AHI, ALO
+ |.if not FPU
+ | mr. AHI, ALO
+ | bclr 12, 2 // Handle zero first.
+ | srawi TMP0, ALO, 31
+ | xor TMP1, ALO, TMP0
+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | cntlzw AHI, TMP1
+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
+ | slwi ALO, TMP1, 21
+ | or AHI, AHI, TMP0 // Sign | Exponent.
+ | srwi TMP1, TMP1, 11
+ | slwi AHI, AHI, 20 // Align left.
+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
+ | blr
+ |.endif
+ |.endmacro
+ |
+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d CARG1, CARG2
+ |
+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d CARG3, CARG4
|
|->vm_modi:
| divwo. TMP0, CARG1, CARG2
@@ -2783,21 +3195,21 @@ static void build_subroutines(BuildCtx *ctx)
| addi DISPATCH, r12, GG_G2DISP
| stw r11, CTSTATE->cb.slot
| stw r3, CTSTATE->cb.gpr[0]
- | stfd f1, CTSTATE->cb.fpr[0]
+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
| stw r4, CTSTATE->cb.gpr[1]
- | stfd f2, CTSTATE->cb.fpr[1]
+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
| stw r5, CTSTATE->cb.gpr[2]
- | stfd f3, CTSTATE->cb.fpr[2]
+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
| stw r6, CTSTATE->cb.gpr[3]
- | stfd f4, CTSTATE->cb.fpr[3]
+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
| stw r7, CTSTATE->cb.gpr[4]
- | stfd f5, CTSTATE->cb.fpr[4]
+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
| stw r8, CTSTATE->cb.gpr[5]
- | stfd f6, CTSTATE->cb.fpr[5]
+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
| stw r9, CTSTATE->cb.gpr[6]
- | stfd f7, CTSTATE->cb.fpr[6]
+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
| stw r10, CTSTATE->cb.gpr[7]
- | stfd f8, CTSTATE->cb.fpr[7]
+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
| addi TMP0, sp, CFRAME_SPACE+8
| stw TMP0, CTSTATE->cb.stack
| mr CARG1, CTSTATE
@@ -2808,21 +3220,21 @@ static void build_subroutines(BuildCtx *ctx)
| lp BASE, L:CRET1->base
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp RC, L:CRET1->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li ZERO, 0
| mr L, CRET1
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | stw TMP0, TONUM_HI
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU stw TMP0, TONUM_HI
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
| sub RC, RC, BASE
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| ins_callt
|.endif
|
@@ -2836,7 +3248,7 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG2, RA
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
| lwz CRET1, CTSTATE->cb.gpr[0]
- | lfd FARG1, CTSTATE->cb.fpr[0]
+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
| lwz CRET2, CTSTATE->cb.gpr[1]
| b ->vm_leave_unw
|.endif
@@ -2870,14 +3282,14 @@ static void build_subroutines(BuildCtx *ctx)
| bge <1
|2:
| bney cr1, >3
- | lfd f1, CCSTATE->fpr[0]
- | lfd f2, CCSTATE->fpr[1]
- | lfd f3, CCSTATE->fpr[2]
- | lfd f4, CCSTATE->fpr[3]
- | lfd f5, CCSTATE->fpr[4]
- | lfd f6, CCSTATE->fpr[5]
- | lfd f7, CCSTATE->fpr[6]
- | lfd f8, CCSTATE->fpr[7]
+ | .FPU lfd f1, CCSTATE->fpr[0]
+ | .FPU lfd f2, CCSTATE->fpr[1]
+ | .FPU lfd f3, CCSTATE->fpr[2]
+ | .FPU lfd f4, CCSTATE->fpr[3]
+ | .FPU lfd f5, CCSTATE->fpr[4]
+ | .FPU lfd f6, CCSTATE->fpr[5]
+ | .FPU lfd f7, CCSTATE->fpr[6]
+ | .FPU lfd f8, CCSTATE->fpr[7]
|3:
| lp TMP0, CCSTATE->func
| lwz CARG2, CCSTATE->gpr[1]
@@ -2894,7 +3306,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP2, -4(r14)
| lwz TMP0, 4(r14)
| stw CARG1, CCSTATE:TMP1->gpr[0]
- | stfd FARG1, CCSTATE:TMP1->fpr[0]
+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
| stw CARG2, CCSTATE:TMP1->gpr[1]
| mtlr TMP0
| stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2923,19 +3335,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
+ | lwzux CARG3, RD, BASE
| lwz TMP2, -4(PC)
- | checknum cr0, TMP0
- | lwz CARG3, 4(RD)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RD)
| decode_RD4 TMP2, TMP2
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
if (op == BC_ISLT) {
| bge >2
} else if (op == BC_ISGE) {
@@ -2946,28 +3358,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble >2
}
|1:
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
| ins_next
|
|7: // RA is not an integer.
| bgt cr0, ->vmeta_comp
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| bgt cr1, ->vmeta_comp
| blt cr1, >4
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >5
|
|8: // RA is an integer, RD is not an integer.
| bgt cr1, ->vmeta_comp
| // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|4:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|5:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
if (op == BC_ISLT) {
| bge <2
} else if (op == BC_ISGE) {
@@ -3015,42 +3440,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
| ble cr7, ->BC_ISEQN_Z
} else {
| ble cr7, ->BC_ISNEN_Z
}
|.else
- | lwzux TMP0, RA, BASE
- | lwz TMP2, 0(PC)
+ | lwzux CARG1, RA, BASE
+ | lwz SAVE0, 0(PC)
| lfd f0, 0(RA)
| addi PC, PC, 4
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | decode_RD4 TMP2, TMP2
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | decode_RD4 SAVE0, SAVE0
| lfd f1, 0(RD)
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge cr0, >5
| bge cr1, >5
| fcmpu cr0, f0, f1
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
} else {
| beq >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
}
|1:
| ins_next
@@ -3058,36 +3483,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Either or both types are not numbers.
|.if not DUALNUM
| lwz CARG2, 4(RA)
- | lwz CARG3, 4(RD)
+ | lwz CARG4, 4(RD)
|.endif
|.if FFI
- | cmpwi cr7, TMP0, LJ_TCDATA
- | cmpwi cr5, TMP1, LJ_TCDATA
+ | cmpwi cr7, CARG1, LJ_TCDATA
+ | cmpwi cr5, CARG3, LJ_TCDATA
|.endif
- | not TMP3, TMP0
- | cmplw TMP0, TMP1
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
+ | not TMP2, CARG1
+ | cmplw CARG1, CARG3
+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
|.if FFI
| cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
|.endif
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
|.if FFI
| beq cr7, ->vmeta_equal_cd
|.endif
- | cmplw cr5, CARG2, CARG3
+ | cmplw cr5, CARG2, CARG4
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
- | mr SAVE0, PC
+ | mr SAVE1, PC
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
if (vk) {
| bne cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
} else {
| beq cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
}
|.if DUALNUM
@@ -3102,6 +3527,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
+ | mr CARG3, CARG4
| lwz TAB:TMP2, TAB:CARG2->metatable
| li CARG4, 1-vk // ne = 0 or 1.
| cmplwi TAB:TMP2, 0
@@ -3109,7 +3535,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP2, TAB:TMP2->nomm
| andix. TMP2, TMP2, 1<<MM_eq
| bne <1 // Or 'no __eq' flag set?
- | mr PC, SAVE0 // Restore old PC.
+ | mr PC, SAVE1 // Restore old PC.
| b ->vmeta_equal // Handle __eq metamethod.
break;
@@ -3150,16 +3576,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, KBASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | lwzux CARG3, RD, KBASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
|->BC_ISEQN_Z:
} else {
@@ -3167,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
|4:
|.else
if (vk) {
@@ -3175,20 +3601,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} else {
|->BC_ISNEN_Z: // Dummy label.
}
- | lwzx TMP0, BASE, RA
+ | lwzx CARG1, BASE, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
- | lwz TMP2, -4(PC)
+ | lwz SAVE0, -4(PC)
| lfdx f1, KBASE, RD
- | decode_RD4 TMP2, TMP2
- | checknum TMP0
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | decode_RD4 SAVE0, SAVE0
+ | checknum CARG1
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge >3
| fcmpu cr0, f0, f1
|.endif
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|1:
|.if not FFI
|3:
@@ -3199,13 +3625,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if not FFI
|3:
|.endif
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
}
| ins_next
|.if FFI
|3:
- | cmpwi TMP0, LJ_TCDATA
+ | cmpwi CARG1, LJ_TCDATA
| beq ->vmeta_equal_cd
| b <1
|.endif
@@ -3213,18 +3639,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bge cr0, <3
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| blt cr1, >1
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >2
|
|8: // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|1:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|2:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
| b <4
|.endif
break;
@@ -3279,7 +3718,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add PC, PC, TMP2
} else {
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfdx f0, BASE, RD
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG2, 4(RD)
+ |.endif
| cmplw TMP0, TMP1
if (op == BC_ISTC) {
| bge >1
@@ -3288,7 +3732,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| addis PC, PC, -(BCBIAS_J*4 >> 16)
| decode_RD4 TMP2, INS
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| add PC, PC, TMP2
|1:
}
@@ -3323,8 +3772,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_MOV:
| // RA = dst*8, RD = src*8
| ins_next1
+ |.if FPU
| lfdx f0, BASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, BASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_NOT:
@@ -3426,44 +3882,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vn
| .else
- | checknum TMP1; bge ->vmeta_arith_vn
+ | checknum CARG1; bge ->vmeta_arith_vn
| .endif
|| break;
||case 1:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f15, BASE, RB
| lfdx f14, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_nv
| .else
- | checknum TMP1; bge ->vmeta_arith_nv
+ | checknum CARG1; bge ->vmeta_arith_nv
| .endif
|| break;
||default:
- | lwzx TMP1, BASE, RB
- | lwzx TMP2, BASE, RC
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
|| break;
@@ -3497,48 +3974,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| fsub a, b, a // b - floor(b/c)*c
|.endmacro
|
+ |.macro sfpmod
+ |->BC_MODVN_Z:
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | mr SAVE0, CARG3
+ | mr SAVE1, CARG4
+ | blex __divdf3
+ | blex floor
+ | mr CARG3, SAVE0
+ | mr CARG4, SAVE1
+ | blex __muldf3
+ | mr CARG3, CRET1
+ | mr CARG4, CRET2
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | blex __subdf3
+ |.endmacro
+ |
|.macro ins_arithfp, fpins
| ins_arithpre
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
+ |.elif FPU
| fpins f0, f14, f15
| ins_next1
| stfdx f0, BASE, RA
| ins_next2
+ |.else
+ | blex __divdf3 // Only soft-float div uses this macro.
+ | ins_next1
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ | ins_next2
|.endif
|.endmacro
|
- |.macro ins_arithdn, intins, fpins
+ |.macro ins_arithdn, intins, fpins, fpcall
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, KBASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||case 1:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG2, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG1, 4(RC)
+ | lwzux CARG3, RB, BASE
+ | lwzux CARG1, RC, KBASE
+ | lwz CARG4, 4(RB)
+ | checknum cr0, CARG3
+ | lwz CARG2, 4(RC)
+ | checknum cr1, CARG1
|| break;
||default:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, BASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, BASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||}
- | checknum cr1, TMP2
| bne >5
| bne cr1, >5
- | intins CARG1, CARG1, CARG2
+ |.if "intins" == "intmod"
+ | mr CARG1, CARG2
+ | mr CARG2, CARG4
+ |.endif
+ | intins CARG1, CARG2, CARG4
| bso >4
|1:
| ins_next1
@@ -3550,29 +4057,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| checkov TMP0, <1 // Ignore unrelated overflow.
| ins_arithfallback b
|5: // FP variant.
+ |.if FPU
||if (vk == 1) {
| lfd f15, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f14, 0(RC)
||} else {
| lfd f14, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f15, 0(RC)
||}
+ |.endif
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| ins_arithfallback bge
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|.else
+ |.if FPU
| fpins f0, f14, f15
- | ins_next1
| stfdx f0, BASE, RA
+ |.else
+ |.if "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | blex fpcall
+ |.endif
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ |.endif
+ | ins_next1
| b <2
|.endif
|.endmacro
|
- |.macro ins_arith, intins, fpins
+ |.macro ins_arith, intins, fpins, fpcall
|.if DUALNUM
- | ins_arithdn intins, fpins
+ | ins_arithdn intins, fpins, fpcall
|.else
| ins_arithfp fpins
|.endif
@@ -3587,9 +4105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addo. TMP0, TMP0, TMP3
| add y, a, b
|.endmacro
- | ins_arith addo32., fadd
+ | ins_arith addo32., fadd, __adddf3
|.else
- | ins_arith addo., fadd
+ | ins_arith addo., fadd, __adddf3
|.endif
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3601,36 +4119,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subo. TMP0, TMP0, TMP3
| sub y, a, b
|.endmacro
- | ins_arith subo32., fsub
+ | ins_arith subo32., fsub, __subdf3
|.else
- | ins_arith subo., fsub
+ | ins_arith subo., fsub, __subdf3
|.endif
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mullwo., fmul
+ | ins_arith mullwo., fmul, __muldf3
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
| ins_arithfp fdiv
break;
case BC_MODVN:
- | ins_arith intmod, fpmod
+ | ins_arith intmod, fpmod, sfpmod
break;
case BC_MODNV: case BC_MODVV:
- | ins_arith intmod, fpmod_
+ | ins_arith intmod, fpmod_, sfpmod
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ |.if FPU
| lfdx FARG1, BASE, RB
- | lwzx TMP2, BASE, RC
| lfdx FARG2, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ |.else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ |.endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
| blex pow
| ins_next1
+ |.if FPU
| stfdx FARG1, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3650,8 +4180,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lp BASE, L->base
| bne ->vmeta_binop
| ins_next1
+ |.if FPU
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, SAVE0, BASE
+ | lwz TMP1, 4(SAVE0)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3714,8 +4251,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| ins_next1
+ |.if FPU
| lfdx f0, KBASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, KBASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_KPRI:
@@ -3748,8 +4292,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwzx UPVAL:RB, LFUNC:RB, RD
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| lfd f0, 0(TMP1)
| stfdx f0, BASE, RA
+ |.else
+ | lwz TMP2, 0(TMP1)
+ | lwz TMP3, 4(TMP1)
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
break;
case BC_USETV:
@@ -3757,14 +4308,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdux f0, RD, BASE
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| lbz TMP3, UPVAL:RB->marked
| lwz CARG2, UPVAL:RB->v
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbz TMP0, UPVAL:RB->closed
| lwz TMP2, 0(RD)
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw CARG1, 0(CARG2)
+ | stw CARG3, 4(CARG2)
+ |.endif
| cmplwi cr1, TMP0, 0
| lwz TMP1, 4(RD)
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3820,11 +4381,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdx f0, KBASE, RD
+ |.else
+ | lwzux TMP2, RD, KBASE
+ | lwz TMP3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw TMP2, 0(TMP1)
+ | stw TMP3, 4(TMP1)
+ |.endif
| ins_next2
break;
case BC_USETP:
@@ -3972,11 +4543,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| ble ->vmeta_tgetv // Integer key and in array part?
| lwzx TMP0, TMP1, TMP2
+ |.if FPU
| lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP1, TMP2
+ | lwz SAVE1, 4(TMP1)
+ |.endif
| checknil TMP0; beq >2
|1:
| ins_next1
+ |.if FPU
| stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
| ins_next2
|
|2: // Check for __index if table value is nil.
@@ -4052,12 +4633,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
+ |.if FPU
| lwzx TMP1, TMP2, RC
| lfdx f0, TMP2, RC
+ |.else
+ | lwzux TMP1, TMP2, RC
+ | lwz TMP3, 4(TMP2)
+ |.endif
| checknil TMP1; beq >5
|1:
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP1, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
|
|5: // Check for __index if table value is nil.
@@ -4087,10 +4678,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmplw TMP0, CARG2
| slwi TMP2, CARG2, 3
| ble ->vmeta_tgetr // In array part?
+ |.if FPU
| lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP2, TMP1
+ | lwz SAVE1, 4(TMP2)
+ |.endif
|->BC_TGETR_Z:
| ins_next1
+ |.if FPU
| stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
| ins_next2
break;
@@ -4131,11 +4732,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble ->vmeta_tsetv // Integer key and in array part?
| lwzx TMP2, TMP1, TMP0
| lbz TMP3, TAB:RB->marked
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add SAVE1, BASE, RA
+ | lwz SAVE0, 0(SAVE1)
+ | lwz SAVE1, 4(SAVE1)
+ |.endif
| checknil TMP2; beq >3
|1:
| andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP1, TMP0
+ |.else
+ | stwux SAVE0, TMP1, TMP0
+ | stw SAVE1, 4(TMP1)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4176,7 +4788,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz NODE:TMP2, TAB:RB->node
| stb ZERO, TAB:RB->nomm // Clear metamethod cache.
| and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| slwi TMP0, TMP1, 5
| slwi TMP1, TMP1, 3
| sub TMP1, TMP0, TMP1
@@ -4192,7 +4810,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| checknil CARG2; beq >4 // Key found, but nil value?
|2:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfd f14, NODE:TMP2->val
+ |.else
+ | stw SAVE0, NODE:TMP2->val.u32.hi
+ | stw SAVE1, NODE:TMP2->val.u32.lo
+ |.endif
| bne >7
|3:
| ins_next
@@ -4231,7 +4854,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Returns TValue *.
| lp BASE, L->base
+ |.if FPU
| stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
| b <3 // No 2nd write barrier needed.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4248,13 +4876,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP2, TAB:RB->array
| lbz TMP3, TAB:RB->marked
| cmplw TMP0, TMP1
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| bge ->vmeta_tsetb
| lwzx TMP1, TMP2, RC
| checknil TMP1; beq >5
|1:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP2, RC
+ |.else
+ | stwux SAVE0, RC, TMP2
+ | stw SAVE1, 4(RC)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4294,10 +4933,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|2:
| cmplw TMP0, CARG3
| slwi TMP2, CARG3, 3
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | lwzux SAVE0, RA, BASE
+ | lwz SAVE1, 4(RA)
+ |.endif
| ble ->vmeta_tsetr // In array part?
| ins_next1
+ |.if FPU
| stfdx f14, TMP1, TMP2
+ |.else
+ | stwux SAVE0, TMP1, TMP2
+ | stw SAVE1, 4(TMP1)
+ |.endif
| ins_next2
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4327,10 +4976,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add TMP1, TMP1, TMP0
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz SAVE0, 0(RA)
+ | lwz SAVE1, 4(RA)
+ |.endif
| addi RA, RA, 8
| cmpw cr1, RA, TMP2
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw SAVE0, 0(TMP1)
+ | stw SAVE1, 4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| blt cr1, <3
| bne >7
@@ -4397,9 +5056,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq cr1, >3
|2:
| addi TMP3, TMP2, 8
+ |.if FPU
| lfdx f0, RA, TMP2
+ |.else
+ | add CARG3, RA, TMP2
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmplw cr1, TMP3, NARGS8:RC
+ |.if FPU
| stfdx f0, BASE, TMP2
+ |.else
+ | stwux CARG1, TMP2, BASE
+ | stw CARG2, 4(TMP2)
+ |.endif
| mr TMP2, TMP3
| bne cr1, <2
|3:
@@ -4432,14 +5102,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add BASE, BASE, RA
| lwz TMP1, -24(BASE)
| lwz LFUNC:RB, -20(BASE)
+ |.if FPU
| lfd f1, -8(BASE)
| lfd f0, -16(BASE)
+ |.else
+ | lwz CARG1, -8(BASE)
+ | lwz CARG2, -4(BASE)
+ | lwz CARG3, -16(BASE)
+ | lwz CARG4, -12(BASE)
+ |.endif
| stw TMP1, 0(BASE) // Copy callable.
| stw LFUNC:RB, 4(BASE)
| checkfunc TMP1
- | stfd f1, 16(BASE) // Copy control var.
| li NARGS8:RC, 16 // Iterators get 2 arguments.
+ |.if FPU
+ | stfd f1, 16(BASE) // Copy control var.
| stfdu f0, 8(BASE) // Copy state.
+ |.else
+ | stw CARG1, 16(BASE) // Copy control var.
+ | stw CARG2, 20(BASE)
+ | stwu CARG3, 8(BASE) // Copy state.
+ | stw CARG4, 4(BASE)
+ |.endif
| bne ->vmeta_call
| ins_call
break;
@@ -4460,7 +5144,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP3, RC, 3
| bge >5 // Index points after array part?
| lwzx TMP2, TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux CARG1, TMP3, TMP1
+ | lwz CARG2, 4(TMP3)
+ |.endif
| checknil TMP2
| lwz INS, -4(PC)
| beq >4
@@ -4472,7 +5161,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| addi RC, RC, 1
| addis TMP3, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| decode_RD4 TMP1, INS
| stw RC, -4(RA) // Update control var.
| add PC, TMP1, TMP3
@@ -4497,17 +5191,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi RB, RC, 3
| sub TMP3, TMP3, RB
| lwzx RB, TMP2, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| add NODE:TMP3, TMP2, TMP3
| checknil RB
| lwz INS, -4(PC)
| beq >7
+ |.if FPU
| lfd f1, NODE:TMP3->key
+ |.else
+ | lwz CARG3, NODE:TMP3->key.u32.hi
+ | lwz CARG4, NODE:TMP3->key.u32.lo
+ |.endif
| addis TMP2, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| add RC, RC, TMP0
| decode_RD4 TMP1, INS
+ |.if FPU
| stfd f1, 0(RA)
+ |.else
+ | stw CARG3, 0(RA)
+ | stw CARG4, 4(RA)
+ |.endif
| addi RC, RC, 1
| add PC, TMP1, TMP2
| stw RC, -4(RA) // Update control var.
@@ -4573,9 +5288,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subi TMP2, TMP2, 16
| ble >2 // No vararg slots?
|1: // Copy vararg slots to destination slots.
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RA, TMP2
| cmplw cr1, RC, TMP3
| bge >3 // All destination slots filled?
@@ -4598,9 +5323,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addi MULTRES, TMP1, 8
| bgt >7
|6:
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RC, TMP3
| addi RA, RA, 8
| blt <6 // More vararg slots?
@@ -4651,14 +5386,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP1, 0
|2:
| addi TMP3, TMP1, 8
+ |.if FPU
| lfdx f0, RA, TMP1
+ |.else
+ | add CARG3, RA, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP3, RC
+ |.if FPU
| stfdx f0, TMP2, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| beq >3
| addi TMP1, TMP3, 8
+ |.if FPU
| lfdx f1, RA, TMP3
+ |.else
+ | add CARG3, RA, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP1, RC
+ |.if FPU
| stfdx f1, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| bne <2
|3:
|5:
@@ -4700,8 +5459,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subi TMP2, BASE, 8
| decode_RB8 RB, INS
if (op == BC_RET1) {
+ |.if FPU
| lfd f0, 0(RA)
| stfd f0, 0(TMP2)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ | stw CARG1, 0(TMP2)
+ | stw CARG2, 4(TMP2)
+ |.endif
}
|5:
| cmplw RB, RD
@@ -4762,11 +5528,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|4:
| stw CARG1, FORL_IDX*8+4(RA)
} else {
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz CARG3, FORL_STEP*8+4(RA)
| lwz TMP2, FORL_STOP*8(RA)
| lwz CARG2, FORL_STOP*8+4(RA)
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4809,41 +5575,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (vk) {
|.if DUALNUM
|9: // FP loop.
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
|.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
+ |.else
| lfdux f1, RA, BASE
|.endif
+ |.if FPU
| lfd f3, FORL_STEP*8(RA)
| lfd f2, FORL_STOP*8(RA)
- | lwz TMP3, FORL_STEP*8(RA)
| fadd f1, f1, f3
| stfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG3, FORL_STEP*8(RA)
+ | lwz CARG4, FORL_STEP*8+4(RA)
+ | mr SAVE1, RD
+ | blex __adddf3
+ | mr RD, SAVE1
+ | stw CRET1, FORL_IDX*8(RA)
+ | stw CRET2, FORL_IDX*8+4(RA)
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
+ | lwz SAVE0, FORL_STEP*8(RA)
} else {
|.if DUALNUM
|9: // FP loop.
|.else
| lwzux TMP1, RA, BASE
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz TMP2, FORL_STOP*8(RA)
| cmplw cr0, TMP1, TISNUM
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
|.endif
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
| crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ |.if FPU
| lfd f2, FORL_STOP*8(RA)
+ |.else
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
| bge ->vmeta_for
}
- | cmpwi cr6, TMP3, 0
+ | cmpwi cr6, SAVE0, 0
if (op != BC_JFORL) {
| srwi RD, RD, 1
}
+ |.if FPU
| stfd f1, FORL_EXT*8(RA)
+ |.else
+ | stw CARG1, FORL_EXT*8(RA)
+ | stw CARG2, FORL_EXT*8+4(RA)
+ |.endif
if (op != BC_JFORL) {
| add RD, PC, RD
}
+ |.if FPU
| fcmpu cr0, f1, f2
+ |.else
+ | mr SAVE1, RD
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ | mr RD, SAVE1
+ |.endif
if (op == BC_JFORI) {
| addis PC, RD, -(BCBIAS_J*4 >> 16)
}
--
2.20.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yueeranna/luajit.git
git@gitee.com:yueeranna/luajit.git
yueeranna
luajit
luajit
a8

搜索帮助

0d507c66 1850385 C8b1a773 1850385