aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLeo <thinkabit.ukim@gmail.com>2020-02-29 08:29:59 -0300
committerLeo <thinkabit.ukim@gmail.com>2020-02-29 08:55:23 -0300
commita7d057d98859940cc3347dba011957f99291b1fa (patch)
treed94da562e788dfb0ee30d584e9281bfbfb72fded
parentaf7f776d08cf7c12c3dd62347829fe33e66dceaa (diff)
downloadaports-a7d057d98859940cc3347dba011957f99291b1fa.tar.gz
aports-a7d057d98859940cc3347dba011957f99291b1fa.tar.bz2
aports-a7d057d98859940cc3347dba011957f99291b1fa.tar.xz
main/luajit: modernize, cleanup old patches
-rw-r--r--main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch3522
-rw-r--r--main/luajit/APKBUILD15
-rw-r--r--main/luajit/s390x.patch43692
3 files changed, 2 insertions, 47227 deletions
diff --git a/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch b/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch
deleted file mode 100644
index a879f3fc9d..0000000000
--- a/main/luajit/0004-Add-ppc64-support-based-on-koriakin-GitHub-patchset.patch
+++ /dev/null
@@ -1,3522 +0,0 @@
-From: "Rodrigo R. Galvao" <rosattig@br.ibm.com>
-Date: Wed, 11 Oct 2017 08:41:47 +0000
-Subject: New patch proposal for PPC64 support
-
- Create a patch for PPC64 support based on
-https://github.com/LuaJIT/LuaJIT/pull/140.
- It replaces the old patch since this new one is more likely to be merged
-with luajit upstream.
-
-
-Author: Rodrigo R. Galvao <rosattig@br.ibm.com>
----
- dynasm/dasm_ppc.lua | 5 +
- src/Makefile | 11 +-
- src/host/buildvm_asm.c | 16 +-
- src/lj_arch.h | 18 +-
- src/lj_ccall.c | 166 ++++++-
- src/lj_ccall.h | 13 +
- src/lj_ccallback.c | 68 ++-
- src/lj_ctype.h | 2 +-
- src/lj_def.h | 4 +
- src/lj_frame.h | 9 +
- src/lj_target_ppc.h | 14 +
- src/vm_ppc.dasc | 1290 ++++++++++++++++++++++++++++++++----------------
- 12 files changed, 1162 insertions(+), 454 deletions(-)
-
-diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
-index f73974d..a4ad70b 100644
---- a/dynasm/dasm_ppc.lua
-+++ b/dynasm/dasm_ppc.lua
-@@ -257,9 +257,11 @@ map_op = {
- addic_3 = "30000000RRI",
- ["addic._3"] = "34000000RRI",
- addi_3 = "38000000RR0I",
-+ addil_3 = "38000000RR0J",
- li_2 = "38000000RI",
- la_2 = "38000000RD",
- addis_3 = "3c000000RR0I",
-+ addisl_3 = "3c000000RR0J",
- lis_2 = "3c000000RI",
- lus_2 = "3c000000RU",
- bc_3 = "40000000AAK",
-@@ -842,6 +844,9 @@ map_op = {
- srdi_3 = op_alias("rldicl_4", function(p)
- p[4] = p[3]; p[3] = "64-("..p[3]..")"
- end),
-+ ["srdi._3"] = op_alias("rldicl._4", function(p)
-+ p[4] = p[3]; p[3] = "64-("..p[3]..")"
-+ end),
- clrldi_3 = op_alias("rldicl_4", function(p)
- p[4] = p[3]; p[3] = "0"
- end),
-diff --git a/src/Makefile b/src/Makefile
-index 6b73a89..cc50bae 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -453,7 +453,16 @@ ifeq (ppc,$(TARGET_LJARCH))
- DASM_AFLAGS+= -D GPR64
- endif
- ifeq (PS3,$(TARGET_SYS))
-- DASM_AFLAGS+= -D PPE -D TOC
-+ DASM_AFLAGS+= -D PPE
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_OPD 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D OPD
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_OPDENV 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D OPDENV
-+ endif
-+ ifneq (,$(findstring LJ_ARCH_PPC_ELFV2 1,$(TARGET_TESTARCH)))
-+ DASM_AFLAGS+= -D ELFV2
- endif
- ifneq (,$(findstring LJ_ARCH_PPC64 ,$(TARGET_TESTARCH)))
- DASM_ARCH= ppc64
-diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
-index ffd1490..6bb995e 100644
---- a/src/host/buildvm_asm.c
-+++ b/src/host/buildvm_asm.c
-@@ -140,18 +140,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
- #else
- #define TOCPREFIX ""
- #endif
-- if ((ins >> 26) == 16) {
-+ if ((ins >> 26) == 14) {
-+ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
-+ } else if ((ins >> 26) == 15) {
-+ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym);
-+ } else if ((ins >> 26) == 16) {
- fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n",
- (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym);
- } else if ((ins >> 26) == 18) {
--#if LJ_ARCH_PPC64
-- const char *suffix = strchr(sym, '@');
-- if (suffix && suffix[1] == 'h') {
-- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym);
-- } else if (suffix && suffix[1] == 'l') {
-- fprintf(ctx->fp, "\tld 12, %s\n", sym);
-- } else
--#endif
- fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym);
- } else {
- fprintf(stderr,
-@@ -250,7 +246,7 @@ void emit_asm(BuildCtx *ctx)
- int i, rel;
-
- fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
--#if LJ_ARCH_PPC64
-+#if LJ_ARCH_PPC_ELFV2
- fprintf(ctx->fp, "\t.abiversion 2\n");
- #endif
- fprintf(ctx->fp, "\t.text\n");
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index d609b37..53bc651 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -269,10 +269,18 @@
- #if LJ_TARGET_CONSOLE
- #define LJ_ARCH_PPC32ON64 1
- #define LJ_ARCH_NOFFI 1
-+#if LJ_TARGET_PS3
-+#define LJ_ARCH_PPC_OPD 1
-+#endif
- #elif LJ_ARCH_BITS == 64
--#define LJ_ARCH_PPC64 1
--#define LJ_TARGET_GC64 1
-+#define LJ_ARCH_PPC32ON64 1
- #define LJ_ARCH_NOJIT 1 /* NYI */
-+#if _CALL_ELF == 2
-+#define LJ_ARCH_PPC_ELFV2 1
-+#else
-+#define LJ_ARCH_PPC_OPD 1
-+#define LJ_ARCH_PPC_OPDENV 1
-+#endif
- #endif
-
- #if _ARCH_PWR7
-@@ -423,12 +431,6 @@
- #if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
- #error "No support for PowerPC CPUs without double-precision FPU"
- #endif
--#if !LJ_ARCH_PPC64 && LJ_ARCH_ENDIAN == LUAJIT_LE
--#error "No support for little-endian PPC32"
--#endif
--#if LJ_ARCH_PPC64
--#error "No support for PowerPC 64 bit mode (yet)"
--#endif
- #ifdef __NO_FPRS__
- #error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
- #endif
-diff --git a/src/lj_ccall.c b/src/lj_ccall.c
-index 5c252e5..b891591 100644
---- a/src/lj_ccall.c
-+++ b/src/lj_ccall.c
-@@ -369,21 +369,97 @@
- #elif LJ_TARGET_PPC
- /* -- PPC calling conventions --------------------------------------------- */
-
-+#if LJ_ARCH_BITS == 64
-+
-+#if LJ_ARCH_PPC_ELFV2
-+
-+#define CCALL_HANDLE_STRUCTRET \
-+ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \
-+ cc->retref = 1; /* Return by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp; \
-+ }
-+
-+#define CCALL_HANDLE_STRUCTRET2 \
-+ int isfp = ccall_classify_fp(cts, ctr); \
-+ int i; \
-+ if (isfp == FTYPE_FLOAT) { \
-+ for (i = 0; i < ctr->size / 4; i++) \
-+ ((float *)dp)[i] = cc->fpr[i]; \
-+ } else if (isfp == FTYPE_DOUBLE) { \
-+ for (i = 0; i < ctr->size / 8; i++) \
-+ ((double *)dp)[i] = cc->fpr[i]; \
-+ } else { \
-+ if (ctr->size < 8 && LJ_BE) { \
-+ sp += 8 - ctr->size; \
-+ } \
-+ memcpy(dp, sp, ctr->size); \
-+ }
-+
-+#else
-+
- #define CCALL_HANDLE_STRUCTRET \
- cc->retref = 1; /* Return all structs by reference. */ \
- cc->gpr[ngpr++] = (GPRArg)dp;
-
-+#endif
-+
- #define CCALL_HANDLE_COMPLEXRET \
- /* Complex values are returned in 2 or 4 GPRs. */ \
- cc->retref = 0;
-
-+#define CCALL_HANDLE_STRUCTARG
-+
- #define CCALL_HANDLE_COMPLEXRET2 \
-- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
-+ ((float *)dp)[0] = cc->fpr[0]; \
-+ ((float *)dp)[1] = cc->fpr[1]; \
-+ } else { /* Copy complex double from FPRs. */ \
-+ ((double *)dp)[0] = cc->fpr[0]; \
-+ ((double *)dp)[1] = cc->fpr[1]; \
-+ }
-+
-+#define CCALL_HANDLE_COMPLEXARG \
-+ isfp = 1; \
-+ if (d->size == sizeof(float) * 2) { \
-+ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \
-+ isf32 = 1; \
-+ }
-+
-+#define CCALL_HANDLE_REGARG \
-+ if (isfp && d->size == sizeof(float)) { \
-+ d = ctype_get(cts, CTID_DOUBLE); \
-+ isf32 = 1; \
-+ } \
-+ if (ngpr < maxgpr) { \
-+ dp = &cc->gpr[ngpr]; \
-+ ngpr += n; \
-+ if (ngpr > maxgpr) { \
-+ nsp += ngpr - 8; \
-+ ngpr = 8; \
-+ if (nsp > CCALL_MAXSTACK) { \
-+ goto err_nyi; \
-+ } \
-+ } \
-+ goto done; \
-+ }
-+
-+#else
-+
-+#define CCALL_HANDLE_STRUCTRET \
-+ cc->retref = 1; /* Return all structs by reference. */ \
-+ cc->gpr[ngpr++] = (GPRArg)dp;
-+
-+#define CCALL_HANDLE_COMPLEXRET \
-+ /* Complex values are returned in 2 or 4 GPRs. */ \
-+ cc->retref = 0;
-
- #define CCALL_HANDLE_STRUCTARG \
- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
- sz = CTSIZE_PTR; /* Pass all structs by reference. */
-
-+#define CCALL_HANDLE_COMPLEXRET2 \
-+ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
-+
- #define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
-
-@@ -410,6 +486,8 @@
- } \
- }
-
-+#endif
-+
- #define CCALL_HANDLE_RET \
- if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
-@@ -801,6 +879,50 @@ noth: /* Not a homogeneous float/double aggregate. */
-
- #endif
-
-+/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */
-+
-+#if LJ_ARCH_PPC_ELFV2
-+
-+#define FTYPE_FLOAT 1
-+#define FTYPE_DOUBLE 2
-+
-+static unsigned int ccall_classify_fp(CTState *cts, CType *ct) {
-+ if (ctype_isfp(ct->info)) {
-+ if (ct->size == sizeof(float))
-+ return FTYPE_FLOAT;
-+ else
-+ return FTYPE_DOUBLE;
-+ } else if (ctype_iscomplex(ct->info)) {
-+ if (ct->size == sizeof(float) * 2)
-+ return FTYPE_FLOAT;
-+ else
-+ return FTYPE_DOUBLE;
-+ } else if (ctype_isstruct(ct->info)) {
-+ int res = -1;
-+ int sz = ct->size;
-+ while (ct->sib) {
-+ ct = ctype_get(cts, ct->sib);
-+ if (ctype_isfield(ct->info)) {
-+ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct));
-+ if (res == -1)
-+ res = sub;
-+ if (sub != -1 && sub != res)
-+ return 0;
-+ } else if (ctype_isbitfield(ct->info) ||
-+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
-+ return 0;
-+ }
-+ }
-+ if (res > 0 && sz > res * 4 * 8)
-+ return 0;
-+ return res;
-+ } else {
-+ return 0;
-+ }
-+}
-+
-+#endif
-+
- /* -- MIPS64 ABI struct classification ---------------------------- */
-
- #if LJ_TARGET_MIPS64
-@@ -974,6 +1096,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- CTSize sz;
- MSize n, isfp = 0, isva = 0;
- void *dp, *rp = NULL;
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ int isf32 = 0;
-+#endif
-
- if (fid) { /* Get argument type from field. */
- CType *ctf = ctype_get(cts, fid);
-@@ -1030,7 +1155,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- *(void **)dp = rp;
- dp = rp;
- }
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE
-+ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) {
-+ dp = (char *)dp + (CTSIZE_PTR - sz);
-+ }
-+#endif
- lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if (isfp) {
-+ int i;
-+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((double *)dp)[i];
-+ }
-+ if (isf32) {
-+ int i;
-+ for (i = 0; i < d->size / 8; i++)
-+ ((float *)dp)[i*2] = ((double *)dp)[i];
-+ }
-+#endif
-+#if LJ_ARCH_PPC_ELFV2
-+ if (ctype_isstruct(d->info)) {
-+ isfp = ccall_classify_fp(cts, d);
-+ int i;
-+ if (isfp == FTYPE_FLOAT) {
-+ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((float *)dp)[i];
-+ } else if (isfp == FTYPE_DOUBLE) {
-+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++)
-+ cc->fpr[nfpr++] = ((double *)dp)[i];
-+ }
-+ }
-+#endif
- /* Extend passed integers to 32 bits at least. */
- if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
- if (d->info & CTF_UNSIGNED)
-@@ -1044,6 +1199,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
- if (isfp && d->size == sizeof(float))
- ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
- #endif
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info))
-+ && d->size <= 4) {
-+ if (d->info & CTF_UNSIGNED)
-+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
-+ else
-+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
-+ }
-+#endif
- #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
- if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
- #if LJ_TARGET_MIPS64
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index 59f6648..bbf309f 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -86,10 +86,23 @@ typedef union FPRArg {
- #elif LJ_TARGET_PPC
-
- #define CCALL_NARG_GPR 8
-+#if LJ_ARCH_BITS == 64
-+#define CCALL_NARG_FPR 13
-+#if LJ_ARCH_PPC_ELFV2
-+#define CCALL_NRET_GPR 2
-+#define CCALL_NRET_FPR 8
-+#define CCALL_SPS_EXTRA 14
-+#else
-+#define CCALL_NRET_GPR 1
-+#define CCALL_NRET_FPR 2
-+#define CCALL_SPS_EXTRA 16
-+#endif
-+#else
- #define CCALL_NARG_FPR 8
- #define CCALL_NRET_GPR 4 /* For complex double. */
- #define CCALL_NRET_FPR 1
- #define CCALL_SPS_EXTRA 4
-+#endif
- #define CCALL_SPS_FREE 0
-
- typedef intptr_t GPRArg;
-diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
-index 846827b..eb7f445 100644
---- a/src/lj_ccallback.c
-+++ b/src/lj_ccallback.c
-@@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
-
- #elif LJ_TARGET_PPC
-
-+#if LJ_ARCH_PPC_OPD
-+
-+#define CALLBACK_SLOT2OFS(slot) (24*(slot))
-+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24)
-+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
-+
-+#elif LJ_ARCH_PPC_ELFV2
-+
-+#define CALLBACK_SLOT2OFS(slot) (4*(slot))
-+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4)
-+#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10)
-+
-+#else
-+
- #define CALLBACK_MCODE_HEAD 24
-
-+#endif
-+
- #elif LJ_TARGET_MIPS32
-
- #define CALLBACK_MCODE_HEAD 20
-@@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
- }
- #elif LJ_TARGET_PPC
-+#if LJ_ARCH_PPC_OPD
-+register void *vm_toc __asm__("r2");
-+static void callback_mcode_init(global_State *g, uint64_t *page)
-+{
-+ uint64_t *p = page;
-+ void *target = (void *)lj_vm_ffi_callback;
-+ MSize slot;
-+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-+ *p++ = (uint64_t)target;
-+ *p++ = (uint64_t)vm_toc;
-+ *p++ = (uint64_t)g | ((uint64_t)slot << 47);
-+ }
-+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8);
-+}
-+#else
- static void callback_mcode_init(global_State *g, uint32_t *page)
- {
- uint32_t *p = page;
- void *target = (void *)lj_vm_ffi_callback;
- MSize slot;
-+#if LJ_ARCH_PPC_ELFV2
-+ // Needs to be in sync with lj_vm_ffi_callback.
-+ lua_assert(CALLBACK_MCODE_SIZE == 4096);
-+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-+ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2);
-+ p++;
-+ }
-+ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff);
-+ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff);
-+ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
-+ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */
-+ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff);
-+ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff);
-+ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1);
-+ *p++ = PPCI_BCTR;
-+#else
- *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16);
-- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16);
-+ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16);
- *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff);
-- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff);
-+ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff);
- *p++ = PPCI_MTCTR | PPCF_T(RID_TMP);
- *p++ = PPCI_BCTR;
- for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
-- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot;
-+ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot;
- *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
- p++;
- }
-- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
-+#endif
-+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4);
- }
-+#endif
- #elif LJ_TARGET_MIPS
- static void callback_mcode_init(global_State *g, uint32_t *page)
- {
-@@ -641,6 +692,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
- *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
- (int32_t)*(int16_t *)dp;
- }
-+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64
-+ if (ctr->size <= 4 &&
-+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) {
-+ if (ctr->info & CTF_UNSIGNED)
-+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp;
-+ else
-+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
-+ }
-+#endif
- #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
- /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
- if (ctr->size <= 4 &&
-diff --git a/src/lj_ctype.h b/src/lj_ctype.h
-index 0c220a8..105865b 100644
---- a/src/lj_ctype.h
-+++ b/src/lj_ctype.h
-@@ -153,7 +153,7 @@ typedef struct CType {
-
- /* Simplify target-specific configuration. Checked in lj_ccall.h. */
- #define CCALL_MAX_GPR 8
--#define CCALL_MAX_FPR 8
-+#define CCALL_MAX_FPR 14
-
- typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;
-
-diff --git a/src/lj_def.h b/src/lj_def.h
-index 2d8fff6..381d6f5 100644
---- a/src/lj_def.h
-+++ b/src/lj_def.h
-@@ -71,7 +71,11 @@ typedef unsigned int uintptr_t;
- #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
- #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
-
-+#if defined(__powerpc64__) && _CALL_ELF != 2
-+#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
-+#else
- #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
-+#endif
-
- /* Minimum table/buffer sizes. */
- #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 19c49a4..c666418 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -210,6 +210,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
- #define CFRAME_OFS_MULTRES 408
- #define CFRAME_SIZE 384
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_ARCH_PPC_ELFV2
-+#define CFRAME_OFS_ERRF 360
-+#define CFRAME_OFS_NRES 356
-+#define CFRAME_OFS_PREV 336
-+#define CFRAME_OFS_L 352
-+#define CFRAME_OFS_PC 348
-+#define CFRAME_OFS_MULTRES 344
-+#define CFRAME_SIZE 368
-+#define CFRAME_SHIFT_MULTRES 3
- #elif LJ_ARCH_PPC32ON64
- #define CFRAME_OFS_ERRF 472
- #define CFRAME_OFS_NRES 468
-diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
-index c5c991a..f0c8c94 100644
---- a/src/lj_target_ppc.h
-+++ b/src/lj_target_ppc.h
-@@ -30,8 +30,13 @@ enum {
-
- /* Calling conventions. */
- RID_RET = RID_R3,
-+#if LJ_LE
-+ RID_RETHI = RID_R4,
-+ RID_RETLO = RID_R3,
-+#else
- RID_RETHI = RID_R3,
- RID_RETLO = RID_R4,
-+#endif
- RID_FPRET = RID_F1,
-
- /* These definitions must match with the *.dasc file(s): */
-@@ -131,6 +136,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
- #define PPCF_C(r) ((r) << 6)
- #define PPCF_MB(n) ((n) << 6)
- #define PPCF_ME(n) ((n) << 1)
-+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1)))
-+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5)))
- #define PPCF_Y 0x00200000
- #define PPCF_DOT 0x00000001
-
-@@ -200,6 +207,13 @@ typedef enum PPCIns {
- PPCI_RLWINM = 0x54000000,
- PPCI_RLWIMI = 0x50000000,
-
-+ PPCI_RLDICL = 0x78000000,
-+ PPCI_RLDICR = 0x78000004,
-+ PPCI_RLDIC = 0x78000008,
-+ PPCI_RLDIMI = 0x7800000c,
-+ PPCI_RLDCL = 0x78000010,
-+ PPCI_RLDCR = 0x78000012,
-+
- PPCI_B = 0x48000000,
- PPCI_BL = 0x48000001,
- PPCI_BC = 0x40800000,
-diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
-index b4260eb..abb381e 100644
---- a/src/vm_ppc.dasc
-+++ b/src/vm_ppc.dasc
-@@ -22,35 +22,40 @@
- |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
- |// Affects reg saves, stack layout, carry/overflow/dot flags etc.
- |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
--|// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3).
-+|// OPD Need function descriptors (64 bit or 32 bit variant, e.g. PS3).
- |// Function pointers are really a struct: code, TOC, env (optional).
--|// TOCENV Function pointers have an environment pointer, too (not on PS3).
-+|// OPDENV Function pointers have an environment pointer, too (not on PS3).
-+|// ELFV2 The 64-bit ELF V2 ABI is in use.
- |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360).
- |// Must avoid (slow) micro-coded instructions.
- |
- |.if P64
--|.define TOC, 1
--|.define TOCENV, 1
- |.macro lpx, a, b, c; ldx a, b, c; .endmacro
- |.macro lp, a, b; ld a, b; .endmacro
- |.macro stp, a, b; std a, b; .endmacro
-+|.macro stpx, a, b, c; stdx a, b, c; .endmacro
- |.define decode_OPP, decode_OP8
--|.if FFI
--|// Missing: Calling conventions, 64 bit regs, TOC.
--|.error lib_ffi not yet implemented for PPC64
--|.endif
-+|.define PSIZE, 8
- |.else
- |.macro lpx, a, b, c; lwzx a, b, c; .endmacro
- |.macro lp, a, b; lwz a, b; .endmacro
- |.macro stp, a, b; stw a, b; .endmacro
-+|.macro stpx, a, b, c; stwx a, b, c; .endmacro
- |.define decode_OPP, decode_OP4
-+|.define PSIZE, 4
- |.endif
- |
- |// Convenience macros for TOC handling.
--|.if TOC
-+|.if OPD or ELFV2
- |// Linker needs a TOC patch area for every external call relocation.
--|.macro blex, target; bl extern target@plt; nop; .endmacro
-+|.macro blex, target; bl extern target; nop; .endmacro
- |.macro .toc, a, b; a, b; .endmacro
-+|.else
-+|.macro blex, target; bl extern target@plt; .endmacro
-+|.macro .toc, a, b; .endmacro
-+|.endif
-+|.if OPD
-+|.macro .opd, a, b; a, b; .endmacro
- |.if P64
- |.define TOC_OFS, 8
- |.define ENV_OFS, 16
-@@ -58,13 +63,13 @@
- |.define TOC_OFS, 4
- |.define ENV_OFS, 8
- |.endif
--|.else // No TOC.
--|.macro blex, target; bl extern target@plt; .endmacro
--|.macro .toc, a, b; .endmacro
-+|.else // No OPD.
-+|.macro .opd, a, b; .endmacro
- |.endif
--|.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro
-+|.macro .opdenv, a, b; .if OPDENV; a, b; .endif; .endmacro
- |
- |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro
-+|.macro .elfv2, a, b; .if ELFV2; a, b; .endif; .endmacro
- |
- |.macro andix., y, a, i
- |.if PPE
-@@ -75,29 +80,6 @@
- |.endif
- |.endmacro
- |
--|.macro clrso, reg
--|.if PPE
--| li reg, 0
--| mtxer reg
--|.else
--| mcrxr cr0
--|.endif
--|.endmacro
--|
--|.macro checkov, reg, noov
--|.if PPE
--| mfxer reg
--| add reg, reg, reg
--| cmpwi reg, 0
--| li reg, 0
--| mtxer reg
--| bgey noov
--|.else
--| mcrxr cr0
--| bley noov
--|.endif
--|.endmacro
--|
- |//-----------------------------------------------------------------------
- |
- |// Fixed register assignments for the interpreter.
-@@ -111,6 +93,7 @@
- |.define LREG, r18 // Register holding lua_State (also in SAVE_L).
- |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
- |.define JGL, r31 // On-trace: global_State + 32768.
-+|.define BASEP4, r25 // Equal to BASE + 4
- |
- |// Constants for type-comparisons, stores and conversions. C callee-save.
- |.define TISNUM, r22
-@@ -143,12 +126,19 @@
- |
- |.define FARG1, f1
- |.define FARG2, f2
-+|.define FARG3, f3
-+|.define FARG4, f4
-+|.define FARG5, f5
-+|.define FARG6, f6
-+|.define FARG7, f7
-+|.define FARG8, f8
- |
- |.define CRET1, r3
- |.define CRET2, r4
- |
- |.define TOCREG, r2 // TOC register (only used by C code).
- |.define ENVREG, r11 // Environment pointer (nested C functions).
-+|.define FUNCREG, r12 // ELFv2 function pointer (overlaps RD)
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.if GPR64
-@@ -182,6 +172,49 @@
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
- |
-+|.elif ELFV2
-+|
-+|// 392(sp) // \ 32 bit C frame info.
-+|.define SAVE_LR, 384(sp)
-+|.define SAVE_CR, 376(sp) // 64 bit CR save.
-+|.define CFRAME_SPACE, 368 // Delta for sp.
-+|// Back chain for sp: 368(sp) <-- sp entering interpreter
-+|.define SAVE_ERRF, 360(sp) // |
-+|.define SAVE_NRES, 356(sp) // |
-+|.define SAVE_L, 352(sp) // > Parameter save area.
-+|.define SAVE_PC, 348(sp) // |
-+|.define SAVE_MULTRES, 344(sp) // |
-+|.define SAVE_CFRAME, 336(sp) // / 64 bit C frame chain.
-+|.define SAVE_FPR_, 192 // .. 192+18*8: 64 bit FPR saves.
-+|.define SAVE_GPR_, 48 // .. 48+18*8: 64 bit GPR saves.
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 44(sp)
-+|.define TMPD_LO, 40(sp)
-+|.define TONUM_HI, 36(sp)
-+|.define TONUM_LO, 32(sp)
-+|.else
-+|.define TMPD_LO, 44(sp)
-+|.define TMPD_HI, 40(sp)
-+|.define TONUM_LO, 36(sp)
-+|.define TONUM_HI, 32(sp)
-+|.endif
-+|.define SAVE_TOC, 24(sp) // TOC save area.
-+|// Next frame lr: 16(sp)
-+|// Next frame cr: 8(sp)
-+|// Back chain for sp: 0(sp) <-- sp while in interpreter
-+|
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 32(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
-+|.define TMPD_BLO, 39(sp)
-+|.define TMPD, TMPD_HI
-+|.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 32
-+|
- |.else
- |
- |// 508(sp) // \ 32 bit C frame info.
-@@ -192,23 +225,39 @@
- |.define SAVE_MULTRES, 456(sp) // |
- |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain.
- |.define SAVE_LR, 416(sp)
-+|.define SAVE_CR, 408(sp) // 64 bit CR save.
- |.define CFRAME_SPACE, 400 // Delta for sp.
- |// Back chain for sp: 400(sp) <-- sp entering interpreter
- |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves.
- |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves.
- |// 48(sp) // Callee parameter save area (ABI mandated).
- |.define SAVE_TOC, 40(sp) // TOC save area.
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 36(sp) // \ Link editor temp (ABI mandated).
-+|.define TMPD_LO, 32(sp) // /
-+|.define TONUM_HI, 28(sp) // \ Compiler temp (ABI mandated).
-+|.define TONUM_LO, 24(sp) // /
-+|.else
- |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated).
- |.define TMPD_HI, 32(sp) // /
- |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated).
- |.define TONUM_HI, 24(sp) // /
-+|.endif
- |// Next frame lr: 16(sp)
--|.define SAVE_CR, 8(sp) // 64 bit CR save.
-+|// Next frame cr: 8(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 32(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
- |.define TMPD_BLO, 39(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 112
- |
- |.endif
- |.else
-@@ -226,16 +275,31 @@
- |.define SAVE_PC, 32(sp)
- |.define SAVE_MULTRES, 28(sp)
- |.define UNUSED1, 24(sp)
-+|.if ENDIAN_LE
-+|.define TMPD_HI, 20(sp)
-+|.define TMPD_LO, 16(sp)
-+|.define TONUM_HI, 12(sp)
-+|.define TONUM_LO, 8(sp)
-+|.else
- |.define TMPD_LO, 20(sp)
- |.define TMPD_HI, 16(sp)
- |.define TONUM_LO, 12(sp)
- |.define TONUM_HI, 8(sp)
-+|.endif
- |// Next frame lr: 4(sp)
- |// Back chain for sp: 0(sp) <-- sp while in interpreter
- |
-+|.if ENDIAN_LE
-+|.define TMPD_BLO, 16(sp)
-+|.define TMPD, TMPD_LO
-+|.define TONUM_D, TONUM_LO
-+|.else
- |.define TMPD_BLO, 23(sp)
- |.define TMPD, TMPD_HI
- |.define TONUM_D, TONUM_HI
-+|.endif
-+|
-+|.define EXIT_OFFSET, 16
- |
- |.endif
- |
-@@ -350,8 +414,35 @@
- |//-----------------------------------------------------------------------
- |
- |// Access to frame relative to BASE.
-+|.if ENDIAN_LE
-+|.define FRAME_PC, -4
-+|.define FRAME_FUNC, -8
-+|.define FRAME_CONTPC, -12
-+|.define FRAME_CONTRET, -16
-+|.define WORD_LO, 0
-+|.define WORD_HI, 4
-+|.define WORD_BLO, 0
-+|.define BASE_LO, BASE
-+|.define BASE_HI, BASEP4
-+|.macro lwzux2, hi, lo, base, idx
-+| lwzux lo, base, idx
-+| lwz hi, 4(base)
-+|.endmacro
-+|.else
- |.define FRAME_PC, -8
- |.define FRAME_FUNC, -4
-+|.define FRAME_CONTPC, -16
-+|.define FRAME_CONTRET, -12
-+|.define WORD_LO, 4
-+|.define WORD_HI, 0
-+|.define WORD_BLO, 7
-+|.define BASE_LO, BASEP4
-+|.define BASE_HI, BASE
-+|.macro lwzux2, hi, lo, base, idx
-+| lwzux hi, base, idx
-+| lwz lo, 4(base)
-+|.endmacro
-+|.endif
- |
- |// Instruction decode.
- |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
-@@ -412,6 +503,7 @@
- |// Call decode and dispatch.
- |.macro ins_callt
- | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-+| addi BASEP4, BASE, 4
- | lwz PC, LFUNC:RB->pc
- | lwz INS, 0(PC)
- | addi PC, PC, 4
-@@ -504,7 +596,12 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
- | mr BASE, TMP2 // Restore caller base.
- | // Prepending may overwrite the pcall frame, so do it at the end.
-- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
-+ | .if ENDIAN_LE
-+ | addi RA, RA, -8
-+ | stw TMP1, WORD_HI(RA) // Prepend true to results.
-+ | .else
-+ | stwu TMP1, -8(RA) // Prepend true to results.
-+ | .endif
- |
- |->vm_returnc:
- | addi RD, RD, 8 // RD = (nresults+1)*8.
-@@ -560,7 +657,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP1, L->maxstack
- | cmplw BASE, TMP1
- | bge >8
-- | stw TISNIL, 0(BASE)
-+ | stw TISNIL, WORD_HI(BASE)
- | addi RD, RD, 8
- | addi BASE, BASE, 8
- | b <2
-@@ -611,7 +708,12 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | .toc ld TOCREG, SAVE_TOC
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp BASE, L->base
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
-@@ -626,7 +728,7 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE) // Results start at BASE-8.
- | stw TMP3, TMPD
- | addi DISPATCH, DISPATCH, GG_G2DISP
-- | stw TMP1, 0(RA) // Prepend false to error message.
-+ | stw TMP1, WORD_HI(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
- | lfs TONUM, TMPD
-@@ -687,7 +789,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | mr RA, BASE
- | lp BASE, L->base
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp TMP1, L->top
- | lwz PC, FRAME_PC(BASE)
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
-@@ -737,7 +844,12 @@ static void build_subroutines(BuildCtx *ctx)
- |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp TMP1, L->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | add PC, PC, BASE
-@@ -757,8 +869,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->vm_call_dispatch:
- | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
-- | lwz TMP0, FRAME_PC(BASE)
-- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-+ | lwz TMP0, WORD_HI-8(BASE)
-+ | lwz LFUNC:RB, WORD_LO-8(BASE)
- | checkfunc TMP0; bne ->vmeta_call
- |
- |->vm_call_dispatch_f:
-@@ -777,7 +889,9 @@ static void build_subroutines(BuildCtx *ctx)
- | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
- | lp TMP1, L->cframe
- | addi DISPATCH, DISPATCH, GG_G2DISP
-- | .toc lp CARG4, 0(CARG4)
-+ | .opd lp TOCREG, TOC_OFS(CARG4)
-+ | .opdenv lp ENVREG, ENV_OFS(CARG4)
-+ | .opd lp CARG4, 0(CARG4)
- | li TMP2, 0
- | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
- | stw TMP2, SAVE_ERRF // No error function.
-@@ -785,7 +899,9 @@ static void build_subroutines(BuildCtx *ctx)
- | stp sp, L->cframe // Add our C frame to cframe chain.
- | stw L, DISPATCH_GL(cur_L)(DISPATCH)
- | mtctr CARG4
-+ | .elfv2 mr FUNCREG, CARG4
- | bctrl // (lua_State *L, lua_CFunction func, void *ud)
-+ | .toc lp TOCREG, SAVE_TOC
- |.if PPE
- | mr BASE, CRET1
- | cmpwi CRET1, 0
-@@ -807,20 +923,27 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_dispatch:
- | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
-- | lwz TMP0, -12(BASE) // Continuation.
-+ | lwz TMP0, FRAME_CONTRET(BASE) // Continuation.
- | mr RB, BASE
- | mr BASE, TMP2 // Restore caller BASE.
- | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
- |.if FFI
- | cmplwi TMP0, 1
- |.endif
-- | lwz PC, -16(RB) // Restore PC from [cont|PC].
-- | subi TMP2, RD, 8
-+ | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC].
-+ | addi BASEP4, BASE, 4
-+ | addi TMP2, RD, WORD_HI-8
- | lwz TMP1, LFUNC:TMP1->pc
- | stwx TISNIL, RA, TMP2 // Ensure one valid arg.
-+ |.if P64
-+ | ld TMP3, 0(DISPATCH)
-+ |.endif
- |.if FFI
- | ble >1
- |.endif
-+ |.if P64
-+ | add TMP0, TMP0, TMP3
-+ |.endif
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // BASE = base, RA = resultptr, RB = meta base
- | mtctr TMP0
-@@ -856,20 +979,20 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TSTR
- | decode_RB8 RB, INS
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | add CARG2, BASE, RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tgets:
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TTAB
-- | stw TAB:RB, 4(CARG2)
-+ | stw TAB:RB, WORD_LO(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-- | stw TMP0, 0(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | li TMP1, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-- | stw TMP1, 0(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
-+ | stw TMP1, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tgetb: // TMP0 = index
-@@ -880,8 +1003,8 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- |.if DUALNUM
-- | stw TISNUM, 0(CARG3)
-- | stw TMP0, 4(CARG3)
-+ | stw TISNUM, WORD_HI(CARG3)
-+ | stw TMP0, WORD_LO(CARG3)
- |.else
- | stfd f0, 0(CARG3)
- |.endif
-@@ -909,7 +1032,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | subfic TMP1, BASE, FRAME_CONT
- | lp BASE, L->top
-- | stw PC, -16(BASE) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 16 // 2 args for func(t, k).
-@@ -923,7 +1046,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lfd f14, 0(CRET1)
- | b ->BC_TGETR_Z
- |1:
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | b ->cont_nop
- |
- |//-----------------------------------------------------------------------
-@@ -932,20 +1055,20 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TSTR
- | decode_RB8 RB, INS
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | add CARG2, BASE, RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tsets:
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | li TMP0, LJ_TTAB
-- | stw TAB:RB, 4(CARG2)
-+ | stw TAB:RB, WORD_LO(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
-- | stw TMP0, 0(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | li TMP1, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-- | stw TMP1, 0(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
-+ | stw TMP1, WORD_HI(CARG3)
- | b >1
- |
- |->vmeta_tsetb: // TMP0 = index
-@@ -956,8 +1079,8 @@ static void build_subroutines(BuildCtx *ctx)
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- |.if DUALNUM
-- | stw TISNUM, 0(CARG3)
-- | stw TMP0, 4(CARG3)
-+ | stw TISNUM, WORD_HI(CARG3)
-+ | stw TMP0, WORD_LO(CARG3)
- |.else
- | stfd f0, 0(CARG3)
- |.endif
-@@ -986,7 +1109,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
- | subfic TMP1, BASE, FRAME_CONT
- | lp BASE, L->top
-- | stw PC, -16(BASE) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
-@@ -1006,17 +1129,9 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_comp:
- | mr CARG1, L
- | subi PC, PC, 4
-- |.if DUALNUM
-- | mr CARG2, RA
-- |.else
- | add CARG2, BASE, RA
-- |.endif
- | stw PC, SAVE_PC
-- |.if DUALNUM
-- | mr CARG3, RD
-- |.else
- | add CARG3, BASE, RD
-- |.endif
- | stp BASE, L->base
- | decode_OP1 CARG4, INS
- | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-@@ -1043,7 +1158,7 @@ static void build_subroutines(BuildCtx *ctx)
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
-- | lwz TMP0, 0(RA)
-+ | lwz TMP0, WORD_HI(RA)
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true.
- | subfe CRET1, CRET1, CRET1
-@@ -1051,7 +1166,7 @@ static void build_subroutines(BuildCtx *ctx)
- | b <4
- |
- |->cont_condf: // RA = resultptr
-- | lwz TMP0, 0(RA)
-+ | lwz TMP0, WORD_HI(RA)
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false.
- | subfe CRET1, CRET1, CRET1
-@@ -1103,8 +1218,8 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- |
- |->vmeta_unm:
-- | mr CARG3, RD
-- | mr CARG4, RD
-+ | add CARG3, BASE, RD
-+ | add CARG4, BASE, RD
- | b >1
- |
- |->vmeta_arith_vn:
-@@ -1139,7 +1254,7 @@ static void build_subroutines(BuildCtx *ctx)
- |->vmeta_binop:
- | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
- | sub TMP1, CRET1, BASE
-- | stw PC, -16(CRET1) // [cont|PC]
-+ | stw PC, FRAME_CONTPC(CRET1) // [cont|PC]
- | mr TMP2, BASE
- | addi PC, TMP1, FRAME_CONT
- | mr BASE, CRET1
-@@ -1150,7 +1265,7 @@ static void build_subroutines(BuildCtx *ctx)
- #if LJ_52
- | mr SAVE0, CARG1
- #endif
-- | mr CARG2, RD
-+ | add CARG2, BASE, RD
- | stp BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
-@@ -1227,25 +1342,25 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_1, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz CARG1, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz CARG1, WORD_LO(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-- | lwz CARG4, 8(BASE)
-- | lwz CARG1, 4(BASE)
-- | lwz CARG2, 12(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz CARG4, WORD_HI+8(BASE)
-+ | lwz CARG1, WORD_LO(BASE)
-+ | lwz CARG2, WORD_LO+8(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_n, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- | lfd FARG1, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
-@@ -1254,9 +1369,9 @@ static void build_subroutines(BuildCtx *ctx)
- |.macro .ffunc_nn, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG4, WORD_HI+8(BASE)
- | lfd FARG2, 8(BASE)
- | blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
-@@ -1279,9 +1394,9 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplw cr1, CARG3, TMP1
- | lwz PC, FRAME_PC(BASE)
- | bge cr1, ->fff_fallback
-- | stw CARG3, 0(RA)
-+ | stw CARG3, WORD_HI(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
-- | stw CARG1, 4(RA)
-+ | stw CARG1, WORD_LO(RA)
- | beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
-@@ -1295,17 +1410,36 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc type
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- | blt ->fff_fallback
- | .gpr64 extsw CARG1, CARG1
-+ |.if P64
-+ | li TMP0, LJ_TNUMX
-+ | srawi TMP3, CARG1, 15
-+ | subfc TMP1, TMP0, CARG1
-+ |.else
- | subfc TMP0, TISNUM, CARG1
-- | subfe TMP2, CARG1, CARG1
-+ |.endif
-+ | subfe TMP2, CARG1, CARG1
-+ |.if P64
-+ | cmpwi TMP3, -2
-+ | orc TMP1, TMP2, TMP1
-+ | subf TMP1, TMP0, TMP1
-+ | beq >1
-+ |.else
- | orc TMP1, TMP2, TMP0
-- | addi TMP1, TMP1, ~LJ_TISNUM+1
-+ | subf TMP1, TISNUM, TMP1
-+ |.endif
- | slwi TMP1, TMP1, 3
-+ |2:
- | la TMP2, CFUNC:RB->upvalue
- | lfdx FARG1, TMP2, TMP1
- | b ->fff_resn
-+ |.if P64
-+ |1:
-+ | li TMP1, ~LJ_TLIGHTUD<<3
-+ | b <2
-+ |.endif
- |
- |//-- Base library: getters and setters ---------------------------------
- |
-@@ -1328,10 +1462,10 @@ static void build_subroutines(BuildCtx *ctx)
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |3: // Rearranged logic, because we expect _not_ to find the key.
-- | lwz CARG4, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
-+ | lwz CARG4, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
-+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
- | checkstr CARG4; bne >4
- | cmpw TMP0, STR:RC; beq >5
- |4:
-@@ -1349,14 +1483,33 @@ static void build_subroutines(BuildCtx *ctx)
- |6:
- | cmpwi CARG3, LJ_TUDATA; beq <1
- | .gpr64 extsw CARG3, CARG3
-+ |.if P64
-+ | li TMP0, LJ_TNUMX
-+ | srawi TMP3, CARG3, 15
-+ | subfc TMP1, TMP0, CARG3
-+ |.else
- | subfc TMP0, TISNUM, CARG3
-+ |.endif
- | subfe TMP2, CARG3, CARG3
-+ |.if P64
-+ | cmpwi TMP3, -2
-+ | orc TMP1, TMP2, TMP1
-+ | subf TMP1, TMP0, TMP1
-+ | beq >7
-+ |.else
- | orc TMP1, TMP2, TMP0
-- | addi TMP1, TMP1, ~LJ_TISNUM+1
-+ | subf TMP1, TISNUM, TMP1
-+ |.endif
- | slwi TMP1, TMP1, 2
-+ |8:
- | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
- | lwzx TAB:CARG1, TMP2, TMP1
- | b <2
-+ |.if P64
-+ |7:
-+ | li TMP1, ~LJ_TLIGHTUD<<2
-+ | b <8
-+ |.endif
- |
- |.ffunc_2 setmetatable
- | // Fast path: no mt for table yet and not clearing the mt.
-@@ -1374,8 +1527,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc rawget
- | cmplwi NARGS8:RC, 16
-- | lwz CARG4, 0(BASE)
-- | lwz TAB:CARG2, 4(BASE)
-+ | lwz CARG4, WORD_HI(BASE)
-+ | lwz TAB:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
- | checktab CARG4; bne ->fff_fallback
- | la CARG3, 8(BASE)
-@@ -1390,7 +1543,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc tonumber
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
- | lfd FARG1, 0(BASE)
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1; bgt ->fff_fallback
-@@ -1425,10 +1578,15 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc next
- | cmplwi NARGS8:RC, 8
-- | lwz CARG1, 0(BASE)
-- | lwz TAB:CARG2, 4(BASE)
-+ | lwz CARG1, WORD_HI(BASE)
-+ | lwz TAB:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
-+ |.if ENDIAN_LE
-+ | add TMP1, BASE, NARGS8:RC
-+ | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil.
-+ |.else
- | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
-+ |.endif
- | checktab CARG1
- | lwz PC, FRAME_PC(BASE)
- | bne ->fff_fallback
-@@ -1464,18 +1622,18 @@ static void build_subroutines(BuildCtx *ctx)
- | lfd f0, CFUNC:RB->upvalue[0]
- | la RA, -8(BASE)
- #endif
-- | stw TISNIL, 8(BASE)
-+ | stw TISNIL, 8+WORD_HI(BASE)
- | li RD, (3+1)*8
- | stfd f0, 0(RA)
- | b ->fff_res
- |
- |.ffunc ipairs_aux
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-- | lwz TAB:CARG1, 4(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz TAB:CARG1, WORD_LO(BASE)
-+ | lwz CARG4, 8+WORD_HI(BASE)
- |.if DUALNUM
-- | lwz TMP2, 12(BASE)
-+ | lwz TMP2, 8+WORD_LO(BASE)
- |.else
- | lfd FARG2, 8(BASE)
- |.endif
-@@ -1504,16 +1662,16 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE)
- | cmplw TMP0, TMP2
- |.if DUALNUM
-- | stw TISNUM, 0(RA)
-+ | stw TISNUM, WORD_HI(RA)
- | slwi TMP3, TMP2, 3
-- | stw TMP2, 4(RA)
-+ | stw TMP2, WORD_LO(RA)
- |.else
- | slwi TMP3, TMP2, 3
- | stfd FARG2, 0(RA)
- |.endif
- | ble >2 // Not in array part?
-- | lwzx TMP2, TMP1, TMP3
-- | lfdx f0, TMP1, TMP3
-+ | lfdux f0, TMP1, TMP3
-+ | lwz TMP2, WORD_HI(TMP1)
- |1:
- | checknil TMP2
- | li RD, (0+1)*8
-@@ -1532,7 +1690,7 @@ static void build_subroutines(BuildCtx *ctx)
- | cmplwi CRET1, 0
- | li RD, (0+1)*8
- | beq ->fff_res
-- | lwz TMP2, 0(CRET1)
-+ | lwz TMP2, WORD_HI(CRET1)
- | lfd f0, 0(CRET1)
- | b <1
- |
-@@ -1551,11 +1709,11 @@ static void build_subroutines(BuildCtx *ctx)
- | la RA, -8(BASE)
- #endif
- |.if DUALNUM
-- | stw TISNUM, 8(BASE)
-+ | stw TISNUM, 8+WORD_HI(BASE)
- |.else
-- | stw ZERO, 8(BASE)
-+ | stw ZERO, 8+WORD_HI(BASE)
- |.endif
-- | stw ZERO, 12(BASE)
-+ | stw ZERO, 8+WORD_LO(BASE)
- | li RD, (3+1)*8
- | stfd f0, 0(RA)
- | b ->fff_res
-@@ -1576,7 +1734,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc xpcall
- | cmplwi NARGS8:RC, 16
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG4, 8+WORD_HI(BASE)
- | lfd FARG2, 8(BASE)
- | lfd FARG1, 0(BASE)
- | blt ->fff_fallback
-@@ -1673,7 +1831,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.if resume
- | li TMP1, LJ_TTRUE
- | la RA, -8(BASE)
-- | stw TMP1, -8(BASE) // Prepend true to results.
-+ | stw TMP1, WORD_HI-8(BASE) // Prepend true to results.
- | addi RD, RD, 16
- |.else
- | mr RA, BASE
-@@ -1693,7 +1851,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lfd f0, 0(TMP3)
- | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
-- | stw TMP1, -8(BASE) // Prepend false to results.
-+ | stw TMP1, WORD_HI-8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
- | stfd f0, 0(BASE) // Copy error message.
- | b <7
-@@ -1746,8 +1904,8 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_resi:
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
-- | stw TISNUM, -8(BASE)
-- | stw CRET1, -4(BASE)
-+ | stw TISNUM, WORD_HI-8(BASE)
-+ | stw CRET1, WORD_LO-8(BASE)
- | b ->fff_res1
- |1:
- | lus CARG3, 0x41e0 // 2^31.
-@@ -1762,9 +1920,9 @@ static void build_subroutines(BuildCtx *ctx)
- |->fff_restv:
- | // CARG3/CARG1 = TValue result.
- | lwz PC, FRAME_PC(BASE)
-- | stw CARG3, -8(BASE)
-+ | stw CARG3, WORD_HI-8(BASE)
- | la RA, -8(BASE)
-- | stw CARG1, -4(BASE)
-+ | stw CARG1, WORD_LO-8(BASE)
- |->fff_res1:
- | // RA = results, PC = return.
- | li RD, (1+1)*8
-@@ -1782,10 +1940,11 @@ static void build_subroutines(BuildCtx *ctx)
- | ins_next1
- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
- | sub BASE, RA, TMP0
-+ | addi BASEP4, BASE, 4
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, RA, TMP1
- | b <5
-@@ -1898,7 +2057,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- | lfd FARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG3; bge ->fff_fallback
-@@ -1923,13 +2082,13 @@ static void build_subroutines(BuildCtx *ctx)
- |.if DUALNUM
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- | lfd FARG1, 0(BASE)
-- | lwz CARG4, 8(BASE)
-+ | lwz CARG4, WORD_HI+8(BASE)
- |.if GPR64
-- | lwz CARG2, 12(BASE)
-+ | lwz CARG2, WORD_LO+8(BASE)
- |.else
-- | lwz CARG1, 12(BASE)
-+ | lwz CARG1, WORD_LO+8(BASE)
- |.endif
- | blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
-@@ -1961,8 +2120,8 @@ static void build_subroutines(BuildCtx *ctx)
- | stfd FARG1, 0(RA)
- | li RD, (2+1)*8
- |.if DUALNUM
-- | stw TISNUM, 8(RA)
-- | stw TMP1, 12(RA)
-+ | stw TISNUM, WORD_HI+8(RA)
-+ | stw TMP1, WORD_LO+8(RA)
- |.else
- | stfd FARG2, 8(RA)
- |.endif
-@@ -1989,9 +2148,9 @@ static void build_subroutines(BuildCtx *ctx)
- | add TMP2, BASE, NARGS8:RC
- | bne >4
- |1: // Handle integers.
-- | lwz CARG4, 0(TMP1)
-+ | lwz CARG4, WORD_HI(TMP1)
- | cmplw cr1, TMP1, TMP2
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG2, WORD_LO(TMP1)
- | bge cr1, ->fff_resi
- | checknum CARG4
- | xoris TMP0, CARG1, 0x8000
-@@ -2020,7 +2179,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lfd FARG1, 0(BASE)
- | bge ->fff_fallback
- |5: // Handle numbers.
-- | lwz CARG4, 0(TMP1)
-+ | lwz CARG4, WORD_HI(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lfd FARG2, 0(TMP1)
- | bge cr1, ->fff_resn
-@@ -2035,7 +2194,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.endif
- | b <5
- |7: // Convert integer to number and continue above.
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG2, WORD_LO(TMP1)
- | bne ->fff_fallback
- | tonum_i FARG2, CARG2
- | b <6
-@@ -2043,7 +2202,12 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc_n name
- | li TMP1, 8
- |1:
-+ |.if ENDIAN_LE
-+ | add CARG2, BASE, TMP1
-+ | lwz CARG2, WORD_HI(CARG2)
-+ |.else
- | lwzx CARG2, BASE, TMP1
-+ |.endif
- | lfdx FARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
-@@ -2067,8 +2231,8 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz STR:CARG1, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz STR:CARG1, WORD_LO(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checkstr CARG3
- | bne ->fff_fallback
-@@ -2099,12 +2263,12 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc string_char // Only handle the 1-arg case here.
- | ffgccheck
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
- |.if DUALNUM
-- | lwz TMP0, 4(BASE)
-+ | lwz TMP0, WORD_LO(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
- | checknum CARG3; bne ->fff_fallback
-- | la CARG2, 7(BASE)
-+ | la CARG2, WORD_BLO(BASE)
- |.else
- | lfd FARG1, 0(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
-@@ -2128,16 +2292,16 @@ static void build_subroutines(BuildCtx *ctx)
- |.ffunc string_sub
- | ffgccheck
- | cmplwi NARGS8:RC, 16
-- | lwz CARG3, 16(BASE)
-+ | lwz CARG3, WORD_HI+16(BASE)
- |.if not DUALNUM
- | lfd f0, 16(BASE)
- |.endif
-- | lwz TMP0, 0(BASE)
-- | lwz STR:CARG1, 4(BASE)
-+ | lwz TMP0, WORD_HI(BASE)
-+ | lwz STR:CARG1, WORD_LO(BASE)
- | blt ->fff_fallback
-- | lwz CARG2, 8(BASE)
-+ | lwz CARG2, WORD_HI+8(BASE)
- |.if DUALNUM
-- | lwz TMP1, 12(BASE)
-+ | lwz TMP1, WORD_LO+8(BASE)
- |.else
- | lfd f1, 8(BASE)
- |.endif
-@@ -2145,7 +2309,7 @@ static void build_subroutines(BuildCtx *ctx)
- | beq >1
- |.if DUALNUM
- | checknum CARG3
-- | lwz TMP2, 20(BASE)
-+ | lwz TMP2, WORD_LO+16(BASE)
- | bne ->fff_fallback
- |1:
- | checknum CARG2; bne ->fff_fallback
-@@ -2201,8 +2365,8 @@ static void build_subroutines(BuildCtx *ctx)
- | .ffunc string_ .. name
- | ffgccheck
- | cmplwi NARGS8:RC, 8
-- | lwz CARG3, 0(BASE)
-- | lwz STR:CARG2, 4(BASE)
-+ | lwz CARG3, WORD_HI(BASE)
-+ | lwz STR:CARG2, WORD_LO(BASE)
- | blt ->fff_fallback
- | checkstr CARG3
- | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
-@@ -2240,10 +2404,10 @@ static void build_subroutines(BuildCtx *ctx)
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
- |1:
-- | lwz CARG4, 0(TMP1)
-+ | lwz CARG4, WORD_HI(TMP1)
- | cmplw cr1, TMP1, TMP2
- |.if DUALNUM
-- | lwz CARG2, 4(TMP1)
-+ | lwz CARG2, WORD_LO(TMP1)
- |.else
- | lfd FARG1, 0(TMP1)
- |.endif
-@@ -2344,20 +2508,23 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->fff_fallback: // Call fast function fallback handler.
- | // BASE = new base, RB = CFUNC, RC = nargs*8
-- | lp TMP3, CFUNC:RB->f
-+ | lp FUNCREG, CFUNC:RB->f
- | add TMP1, BASE, NARGS8:RC
- | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
- | addi TMP0, TMP1, 8*LUA_MINSTACK
- | lwz TMP2, L->maxstack
- | stw PC, SAVE_PC // Redundant (but a defined value).
-- | .toc lp TMP3, 0(TMP3)
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
-+ | .opd lp FUNCREG, 0(FUNCREG)
- | cmplw TMP0, TMP2
- | stp BASE, L->base
- | stp TMP1, L->top
- | mr CARG1, L
- | bgt >5 // Need to grow stack.
-- | mtctr TMP3
-+ | mtctr FUNCREG
- | bctrl // (lua_State *L)
-+ | .toc lp TOCREG, SAVE_TOC
- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
- | lp BASE, L->base
- | cmpwi CRET1, 0
-@@ -2459,6 +2626,7 @@ static void build_subroutines(BuildCtx *ctx)
- |3:
- | lp BASE, L->base
- |4: // Re-dispatch to static ins.
-+ | addi BASEP4, BASE, 4
- | lwz INS, -4(PC)
- | decode_OPP TMP1, INS
- | decode_RB8 RB, INS
-@@ -2472,7 +2640,7 @@ static void build_subroutines(BuildCtx *ctx)
- |
- |->cont_hook: // Continue from hook yield.
- | addi PC, PC, 4
-- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
-+ | lwz MULTRES, WORD_LO-24(RB) // Restore MULTRES for *M ins.
- | b <4
- |
- |->vm_hotloop: // Hot loop counter underflow.
-@@ -2514,6 +2682,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lp BASE, L->base
- | lp TMP0, L->top
- | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
-+ | addi BASEP4, BASE, 4
- | sub NARGS8:RC, TMP0, BASE
- | add RA, BASE, RA
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
-@@ -2525,7 +2694,7 @@ static void build_subroutines(BuildCtx *ctx)
- |.if JIT
- | // RA = resultptr, RB = meta base
- | lwz INS, -4(PC)
-- | lwz TRACE:TMP2, -20(RB) // Save previous trace.
-+ | lwz TRACE:TMP2, WORD_LO-24(RB) // Save previous trace.
- | addic. TMP1, MULTRES, -8
- | decode_RA8 RC, INS // Call base.
- | beq >2
-@@ -2560,10 +2729,16 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG2, PC
- | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- | b ->cont_nop
- |
- |9:
-+ |.if ENDIAN_LE
-+ | addi BASEP4, BASE, 4
-+ | stwx TISNIL, BASEP4, RC
-+ |.else
- | stwx TISNIL, BASE, RC
-+ |.endif
- | addi RC, RC, 8
- | b <3
- |.endif
-@@ -2578,6 +2753,7 @@ static void build_subroutines(BuildCtx *ctx)
- | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
- | lp BASE, L->base
- | subi PC, PC, 4
-+ | addi BASEP4, BASE, 4
- | b ->cont_nop
- #endif
- |
-@@ -2586,39 +2762,72 @@ static void build_subroutines(BuildCtx *ctx)
- |//-----------------------------------------------------------------------
- |
- |.macro savex_, a, b, c, d
-- | stfd f..a, 16+a*8(sp)
-- | stfd f..b, 16+b*8(sp)
-- | stfd f..c, 16+c*8(sp)
-- | stfd f..d, 16+d*8(sp)
-+ | stfd f..a, EXIT_OFFSET+a*8(sp)
-+ | stfd f..b, EXIT_OFFSET+b*8(sp)
-+ | stfd f..c, EXIT_OFFSET+c*8(sp)
-+ | stfd f..d, EXIT_OFFSET+d*8(sp)
-+ |.endmacro
-+ |
-+ |.macro saver, a
-+ | stp r..a, EXIT_OFFSET+32*8+a*PSIZE(sp)
- |.endmacro
- |
- |->vm_exit_handler:
- |.if JIT
-- | addi sp, sp, -(16+32*8+32*4)
-- | stmw r2, 16+32*8+2*4(sp)
-+ | addi sp, TMP0, sp, -(EXIT_OFFSET+32*8+32*PSIZE)
-+ | saver 3 // CARG1
-+ | saver 4 // CARG2
-+ | saver 5 // CARG3
-+ | saver 17 // DISPATCH
- | addi DISPATCH, JGL, -GG_DISP2G-32768
- | li CARG2, ~LJ_VMST_EXIT
-- | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain.
-+ | lp CARG1, EXIT_OFFSET+32*8+32*PSIZE(sp) // Get stack chain.
- | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH)
-+ | saver 2
-+ | saver 6
-+ | saver 7
-+ | saver 8
-+ | saver 9
-+ | saver 10
-+ | saver 11
-+ | saver 12
-+ | saver 13
- | savex_ 0,1,2,3
-- | stw CARG1, 0(sp) // Store extended stack chain.
-- | clrso TMP1
-+ | stp CARG1, 0(sp) // Store extended stack chain.
-+
- | savex_ 4,5,6,7
-- | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp.
-+ | saver 14
-+ | saver 15
-+ | saver 16
-+ | saver 18
-+ | addi CARG2, sp, EXIT_OFFSET+32*8+32*PSIZE // Recompute original value of sp.
- | savex_ 8,9,10,11
-- | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP.
-+ | stp CARG2, EXIT_OFFSET+32*8+1*PSIZE(sp) // Store sp in RID_SP.
- | savex_ 12,13,14,15
- | mflr CARG3
- | li TMP1, 0
- | savex_ 16,17,18,19
-- | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP.
-+ | stw TMP1, EXIT_OFFSET+32*8+0*PSIZE(sp) // Clear RID_TMP.
- | savex_ 20,21,22,23
- | lhz CARG4, 2(CARG3) // Load trace number.
- | savex_ 24,25,26,27
- | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
- | savex_ 28,29,30,31
-+ | saver 19
-+ | saver 20
-+ | saver 21
-+ | saver 22
-+ | saver 23
-+ | saver 24
-+ | saver 25
-+ | saver 26
-+ | saver 27
-+ | saver 28
-+ | saver 29
-+ | saver 30
-+ | saver 31
- | sub CARG3, TMP0, CARG3 // Compute exit number.
-- | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
-+ | lwz BASE, DISPATCH_GL(jit_base)(DISPATCH)
- | srwi CARG3, CARG3, 2
- | stp L, DISPATCH_J(L)(DISPATCH)
- | subi CARG3, CARG3, 2
-@@ -2627,11 +2836,11 @@ static void build_subroutines(BuildCtx *ctx)
- | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
- | addi CARG1, DISPATCH, GG_DISP2J
- | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
-- | addi CARG2, sp, 16
-+ | addi CARG2, sp, EXIT_OFFSET
- | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
- | // Returns MULTRES (unscaled) or negated error code.
- | lp TMP1, L->cframe
-- | lwz TMP2, 0(sp)
-+ | lp TMP2, 0(sp)
- | lp BASE, L->base
- |.if GPR64
- | rldicr sp, TMP1, 0, 61
-@@ -2639,7 +2848,7 @@ static void build_subroutines(BuildCtx *ctx)
- | rlwinm sp, TMP1, 0, 0, 29
- |.endif
- | lwz PC, SAVE_PC // Get SAVE_PC.
-- | stw TMP2, 0(sp)
-+ | stp TMP2, 0(sp)
- | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield).
- | b >1
- |.endif
-@@ -2660,7 +2869,12 @@ static void build_subroutines(BuildCtx *ctx)
- | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // Setup type comparison constants.
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM
-+ |.endif
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stw TMP3, TMPD
- | li ZERO, 0
-@@ -2680,14 +2894,14 @@ static void build_subroutines(BuildCtx *ctx)
- | decode_RA8 RA, INS
- | lpx TMP0, DISPATCH, TMP1
- | mtctr TMP0
-- | cmplwi TMP1, BC_FUNCF*4 // Function header?
-+ | cmplwi TMP1, BC_FUNCF*PSIZE // Function header?
- | bge >2
- | decode_RB8 RB, INS
- | decode_RD8 RD, INS
- | decode_RC8 RC, INS
- | bctr
- |2:
-- | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
-+ | cmplwi TMP1, (BC_FUNCC+2)*PSIZE // Fast function?
- | blt >3
- | // Check frame below fast function.
- | lwz TMP1, FRAME_PC(BASE)
-@@ -2697,7 +2911,7 @@ static void build_subroutines(BuildCtx *ctx)
- | lwz TMP2, -4(TMP1)
- | decode_RA8 TMP0, TMP2
- | sub TMP1, BASE, TMP0
-- | lwz LFUNC:TMP2, -12(TMP1)
-+ | lwz LFUNC:TMP2, WORD_LO-16(TMP1)
- | lwz TMP1, LFUNC:TMP2->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- |3:
-@@ -2718,6 +2932,8 @@ static void build_subroutines(BuildCtx *ctx)
- |// NYI: Use internal implementations of floor, ceil, trunc.
- |
- |->vm_modi:
-+ | li TMP1, 0
-+ | mtxer TMP1
- | divwo. TMP0, CARG1, CARG2
- | bso >1
- |.if GPR64
-@@ -2736,7 +2952,8 @@ static void build_subroutines(BuildCtx *ctx)
- | cmpwi CARG2, 0
- | li CARG1, 0
- | beqlr
-- | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0.
-+ | // Clear SO for -2147483648 % -1 and return 0.
-+ | crxor 4*cr0+so, 4*cr0+so, 4*cr0+so
- | blr
- |
- |//-----------------------------------------------------------------------
-@@ -2749,10 +2966,18 @@ static void build_subroutines(BuildCtx *ctx)
- |->vm_cachesync:
- |.if JIT or FFI
- | // Compute start of first cache line and number of cache lines.
-+ | .if GPR64
-+ | rldicr CARG1, CARG1, 0, 58
-+ | .else
- | rlwinm CARG1, CARG1, 0, 0, 26
-+ | .endif
- | sub CARG2, CARG2, CARG1
- | addi CARG2, CARG2, 31
-+ | .if GPR64
-+ | srdi. CARG2, CARG2, 5
-+ | .else
- | rlwinm. CARG2, CARG2, 27, 5, 31
-+ | .endif
- | beqlr
- | mtctr CARG2
- | mr CARG3, CARG1
-@@ -2774,39 +2999,70 @@ static void build_subroutines(BuildCtx *ctx)
- |//-- FFI helper functions -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
-- |// Handler for callback functions. Callback slot number in r11, g in r12.
-+ |// Handler for callback functions.
-+ |// 32-bit: Callback slot number in r12, g in r11.
-+ |// 64-bit v1: Callback slot number in bits 47+ of r11, g in 0-46, TOC in r2.
-+ |// 64-bit v2: Callback slot number in bits 2-11 of r12, g in r11,
-+ |// vm_ffi_callback in r2.
- |->vm_ffi_callback:
- |.if FFI
- |.type CTSTATE, CTState, PC
-+ | .if OPD
-+ | rldicl r12, r11, 17, 47
-+ | rldicl r11, r11, 0, 17
-+ | .endif
-+ | .if ELFV2
-+ | rlwinm r12, r12, 30, 22, 31
-+ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha
-+ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l
-+ | .endif
- | saveregs
-- | lwz CTSTATE, GL:r12->ctype_state
-- | addi DISPATCH, r12, GG_G2DISP
-- | stw r11, CTSTATE->cb.slot
-- | stw r3, CTSTATE->cb.gpr[0]
-+ | lwz CTSTATE, GL:r11->ctype_state
-+ | addi DISPATCH, r11, GG_G2DISP
-+ | stw r12, CTSTATE->cb.slot
-+ | stp r3, CTSTATE->cb.gpr[0]
- | stfd f1, CTSTATE->cb.fpr[0]
-- | stw r4, CTSTATE->cb.gpr[1]
-+ | stp r4, CTSTATE->cb.gpr[1]
- | stfd f2, CTSTATE->cb.fpr[1]
-- | stw r5, CTSTATE->cb.gpr[2]
-+ | stp r5, CTSTATE->cb.gpr[2]
- | stfd f3, CTSTATE->cb.fpr[2]
-- | stw r6, CTSTATE->cb.gpr[3]
-+ | stp r6, CTSTATE->cb.gpr[3]
- | stfd f4, CTSTATE->cb.fpr[3]
-- | stw r7, CTSTATE->cb.gpr[4]
-+ | stp r7, CTSTATE->cb.gpr[4]
- | stfd f5, CTSTATE->cb.fpr[4]
-- | stw r8, CTSTATE->cb.gpr[5]
-+ | stp r8, CTSTATE->cb.gpr[5]
- | stfd f6, CTSTATE->cb.fpr[5]
-- | stw r9, CTSTATE->cb.gpr[6]
-+ | stp r9, CTSTATE->cb.gpr[6]
- | stfd f7, CTSTATE->cb.fpr[6]
-- | stw r10, CTSTATE->cb.gpr[7]
-+ | stp r10, CTSTATE->cb.gpr[7]
- | stfd f8, CTSTATE->cb.fpr[7]
-+ | .if GPR64
-+ | stfd f9, CTSTATE->cb.fpr[8]
-+ | stfd f10, CTSTATE->cb.fpr[9]
-+ | stfd f11, CTSTATE->cb.fpr[10]
-+ | stfd f12, CTSTATE->cb.fpr[11]
-+ | stfd f13, CTSTATE->cb.fpr[12]
-+ | .endif
-+ | .if ELFV2
-+ | addi TMP0, sp, CFRAME_SPACE+96
-+ | .elif GPR64
-+ | addi TMP0, sp, CFRAME_SPACE+112
-+ | .else
- | addi TMP0, sp, CFRAME_SPACE+8
-- | stw TMP0, CTSTATE->cb.stack
-+ | .endif
-+ | stp TMP0, CTSTATE->cb.stack
- | mr CARG1, CTSTATE
- | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
- | mr CARG2, sp
- | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
- | // Returns lua_State *.
- | lp BASE, L:CRET1->base
-+ |.if P64
-+ | lus TISNUM, LJ_TISNUM >> 16 // Setup type comparison constants.
-+ | ori TISNUM, TISNUM, LJ_TISNUM & 0xffff
-+ |.else
- | li TISNUM, LJ_TISNUM // Setup type comparison constants.
-+ |.endif
- | lp RC, L:CRET1->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | li ZERO, 0
-@@ -2835,9 +3091,21 @@ static void build_subroutines(BuildCtx *ctx)
- | mr CARG1, CTSTATE
- | mr CARG2, RA
- | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
-- | lwz CRET1, CTSTATE->cb.gpr[0]
-+ | lp CRET1, CTSTATE->cb.gpr[0]
- | lfd FARG1, CTSTATE->cb.fpr[0]
-- | lwz CRET2, CTSTATE->cb.gpr[1]
-+ | lp CRET2, CTSTATE->cb.gpr[1]
-+ | .if GPR64
-+ | lfd FARG2, CTSTATE->cb.fpr[1]
-+ | .else
-+ | lp CARG3, CTSTATE->cb.gpr[2]
-+ | lp CARG4, CTSTATE->cb.gpr[3]
-+ | .endif
-+ | .elfv2 lfd f3, CTSTATE->cb.fpr[2]
-+ | .elfv2 lfd f4, CTSTATE->cb.fpr[3]
-+ | .elfv2 lfd f5, CTSTATE->cb.fpr[4]
-+ | .elfv2 lfd f6, CTSTATE->cb.fpr[5]
-+ | .elfv2 lfd f7, CTSTATE->cb.fpr[6]
-+ | .elfv2 lfd f8, CTSTATE->cb.fpr[7]
- | b ->vm_leave_unw
- |.endif
- |
-@@ -2850,23 +3118,46 @@ static void build_subroutines(BuildCtx *ctx)
- | lbz CARG2, CCSTATE->nsp
- | lbz CARG3, CCSTATE->nfpr
- | neg TMP1, TMP1
-+ | .if GPR64
-+ | std TMP0, 16(sp)
-+ | .else
- | stw TMP0, 4(sp)
-+ | .endif
- | cmpwi cr1, CARG3, 0
- | mr TMP2, sp
- | addic. CARG2, CARG2, -1
-+ | .if GPR64
-+ | stdux sp, sp, TMP1
-+ | .else
- | stwux sp, sp, TMP1
-+ | .endif
- | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls.
-- | stw r14, -4(TMP2)
-- | stw CCSTATE, -8(TMP2)
-+ | .if GPR64
-+ | std r14, -8(TMP2)
-+ | std CCSTATE, -16(TMP2)
-+ | .else
-+ | stw r14, -4(TMP2)
-+ | stw CCSTATE, -8(TMP2)
-+ | .endif
- | mr r14, TMP2
- | la TMP1, CCSTATE->stack
-+ | .if GPR64
-+ | sldi CARG2, CARG2, 3
-+ | .else
- | slwi CARG2, CARG2, 2
-+ | .endif
- | blty >2
-- | la TMP2, 8(sp)
-+ | .if ELFV2
-+ | la TMP2, 96(sp)
-+ | .elif GPR64
-+ | la TMP2, 112(sp)
-+ | .else
-+ | la TMP2, 8(sp)
-+ | .endif
- |1:
-- | lwzx TMP0, TMP1, CARG2
-- | stwx TMP0, TMP2, CARG2
-- | addic. CARG2, CARG2, -4
-+ | lpx TMP0, TMP1, CARG2
-+ | stpx TMP0, TMP2, CARG2
-+ | addic. CARG2, CARG2, -PSIZE
- | bge <1
- |2:
- | bney cr1, >3
-@@ -2878,28 +3169,55 @@ static void build_subroutines(BuildCtx *ctx)
- | lfd f6, CCSTATE->fpr[5]
- | lfd f7, CCSTATE->fpr[6]
- | lfd f8, CCSTATE->fpr[7]
-+ | .if GPR64
-+ | lfd f9, CCSTATE->fpr[8]
-+ | lfd f10, CCSTATE->fpr[9]
-+ | lfd f11, CCSTATE->fpr[10]
-+ | lfd f12, CCSTATE->fpr[11]
-+ | lfd f13, CCSTATE->fpr[12]
-+ | .endif
- |3:
-- | lp TMP0, CCSTATE->func
-- | lwz CARG2, CCSTATE->gpr[1]
-- | lwz CARG3, CCSTATE->gpr[2]
-- | lwz CARG4, CCSTATE->gpr[3]
-- | lwz CARG5, CCSTATE->gpr[4]
-- | mtctr TMP0
-- | lwz r8, CCSTATE->gpr[5]
-- | lwz r9, CCSTATE->gpr[6]
-- | lwz r10, CCSTATE->gpr[7]
-- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
-+ | .toc std TOCREG, SAVE_TOC
-+ | lp FUNCREG, CCSTATE->func
-+ | lp CARG2, CCSTATE->gpr[1]
-+ | lp CARG3, CCSTATE->gpr[2]
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
-+ | .opd lp FUNCREG, 0(FUNCREG)
-+ | lp CARG4, CCSTATE->gpr[3]
-+ | lp CARG5, CCSTATE->gpr[4]
-+ | mtctr FUNCREG
-+ | lp r8, CCSTATE->gpr[5]
-+ | lp r9, CCSTATE->gpr[6]
-+ | lp r10, CCSTATE->gpr[7]
-+ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
- | bctrl
-- | lwz CCSTATE:TMP1, -8(r14)
-- | lwz TMP2, -4(r14)
-+ | .toc lp TOCREG, SAVE_TOC
-+ | .if GPR64
-+ | ld CCSTATE:TMP1, -16(r14)
-+ | ld TMP2, -8(r14)
-+ | ld TMP0, 16(r14)
-+ | .else
-+ | lwz CCSTATE:TMP1, -8(r14)
-+ | lwz TMP2, -4(r14)
- | lwz TMP0, 4(r14)
-- | stw CARG1, CCSTATE:TMP1->gpr[0]
-+ | .endif
-+ | stp CARG1, CCSTATE:TMP1->gpr[0]
- | stfd FARG1, CCSTATE:TMP1->fpr[0]
-- | stw CARG2, CCSTATE:TMP1->gpr[1]
-+ | stp CARG2, CCSTATE:TMP1->gpr[1]
-+ | .if GPR64
-+ | stfd FARG2, CCSTATE:TMP1->fpr[1]
-+ | .endif
-+ | .elfv2 stfd FARG3, CCSTATE:TMP1->fpr[2]
-+ | .elfv2 stfd FARG4, CCSTATE:TMP1->fpr[3]
-+ | .elfv2 stfd FARG5, CCSTATE:TMP1->fpr[4]
-+ | .elfv2 stfd FARG6, CCSTATE:TMP1->fpr[5]
-+ | .elfv2 stfd FARG7, CCSTATE:TMP1->fpr[6]
-+ | .elfv2 stfd FARG8, CCSTATE:TMP1->fpr[7]
- | mtlr TMP0
-- | stw CARG3, CCSTATE:TMP1->gpr[2]
-+ | stp CARG3, CCSTATE:TMP1->gpr[2]
- | mr sp, r14
-- | stw CARG4, CCSTATE:TMP1->gpr[3]
-+ | stp CARG4, CCSTATE:TMP1->gpr[3]
- | mr r14, TMP2
- | blr
- |.endif
-@@ -2923,13 +3241,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzx TMP1, BASE_HI, RD
- | lwz TMP2, -4(PC)
- | checknum cr0, TMP0
-- | lwz CARG3, 4(RD)
-+ | lwzx CARG3, BASE_LO, RD
- | decode_RD4 TMP2, TMP2
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
-@@ -2953,7 +3271,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bgt cr0, ->vmeta_comp
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | lfdx f0, BASE, RA
- | bgt cr1, ->vmeta_comp
- | blt cr1, >4
- | // RA is a number, RD is an integer.
-@@ -2965,7 +3283,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RA is an integer, RD is a number.
- | tonum_i f0, CARG2
- |4:
-- | lfd f1, 0(RD)
-+ | lfdx f1, BASE, RD
- |5:
- | fcmpu cr0, f0, f1
- if (op == BC_ISLT) {
-@@ -2981,10 +3299,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | b <1
- |.else
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
-- | lwzx TMP1, BASE, RD
-+ | lwzx TMP1, BASE_HI, RD
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | lfdx f1, BASE, RD
-@@ -3015,15 +3333,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, BASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | .if ENDIAN_LE
-+ | lwzx TMP1, BASE_HI, RD
-+ | .else
-+ | lwzux TMP1, RD, BASE_HI
-+ | .endif
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
-+ | .if ENDIAN_LE
-+ | lwzux CARG3, RD, BASE_LO
-+ | .else
-+ | lwz CARG3, WORD_LO(RD)
-+ | .endif
- | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- if (vk) {
-@@ -3032,14 +3358,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ble cr7, ->BC_ISNEN_Z
- }
- |.else
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | lwz TMP2, 0(PC)
-- | lfd f0, 0(RA)
-+ | lfdx f0, BASE, RA
- | addi PC, PC, 4
-- | lwzux TMP1, RD, BASE
-+ | lwzx TMP1, BASE_HI, RD
- | checknum cr0, TMP0
- | decode_RD4 TMP2, TMP2
-- | lfd f1, 0(RD)
-+ | lfdx f1, BASE, RD
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- | bge cr0, >5
-@@ -3057,8 +3383,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- |5: // Either or both types are not numbers.
- |.if not DUALNUM
-- | lwz CARG2, 4(RA)
-- | lwz CARG3, 4(RD)
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzx CARG3, BASE_LO, RD
- |.endif
- |.if FFI
- | cmpwi cr7, TMP0, LJ_TCDATA
-@@ -3074,10 +3400,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.if FFI
- | beq cr7, ->vmeta_equal_cd
- |.endif
-+ |.if P64
-+ | cmplwi cr7, TMP3, ~LJ_TUDATA // Avoid 64 bit lightuserdata.
-+ |.endif
- | cmplw cr5, CARG2, CARG3
- | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
- | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
-+ |.if P64
-+ | cror 4*cr6+lt, 4*cr6+lt, 4*cr7+gt
-+ |.endif
- | mr SAVE0, PC
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
-@@ -3116,9 +3448,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | srwi RD, RD, 1
-- | lwz STR:TMP3, 4(RA)
-+ | lwzx STR:TMP3, BASE_LO, RA
- | lwz TMP2, 0(PC)
- | subfic RD, RD, -4
- | addi PC, PC, 4
-@@ -3150,15 +3482,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- |.if DUALNUM
-- | lwzux TMP0, RA, BASE
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
-- | lwz CARG2, 4(RA)
-- | lwzux TMP1, RD, KBASE
-+ | lwzx CARG2, BASE_LO, RA
-+ | lwzux2 TMP1, CARG3, RD, KBASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
-- | lwz CARG3, 4(RD)
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
- if (vk) {
- |->BC_ISEQN_Z:
-@@ -3175,7 +3506,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- } else {
- |->BC_ISNEN_Z: // Dummy label.
- }
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | addi PC, PC, 4
- | lfdx f0, BASE, RA
- | lwz TMP2, -4(PC)
-@@ -3213,7 +3544,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |7: // RA is not an integer.
- | bge cr0, <3
- | // RA is a number.
-- | lfd f0, 0(RA)
-+ | lfdx f0, BASE, RA
- | blt cr1, >1
- | // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
-@@ -3232,7 +3563,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | lwz TMP2, 0(PC)
- | not TMP1, TMP1
-@@ -3262,7 +3593,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
-- | lwzx TMP0, BASE, RD
-+ | lwzx TMP0, BASE_HI, RD
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- if (op == BC_IST || op == BC_ISF) {
-@@ -3297,7 +3628,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_ISTYPE:
- | // RA = src*8, RD = -type*8
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | srwi TMP1, RD, 3
- | ins_next1
- |.if not PPE and not GPR64
-@@ -3311,7 +3642,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_ISNUM:
- | // RA = src*8, RD = -(TISNUM-1)*8
-- | lwzx TMP0, BASE, RA
-+ | lwzx TMP0, BASE_HI, RA
- | ins_next1
- | checknum TMP0
- | bge ->vmeta_istype
-@@ -3330,17 +3661,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_NOT:
- | // RA = dst*8, RD = src*8
- | ins_next1
-- | lwzx TMP0, BASE, RD
-+ | lwzx TMP0, BASE_HI, RD
- | .gpr64 extsw TMP0, TMP0
- | subfic TMP1, TMP0, LJ_TTRUE
- | adde TMP0, TMP0, TMP1
-- | stwx TMP0, BASE, RA
-+ | stwx TMP0, BASE_HI, RA
- | ins_next2
- break;
- case BC_UNM:
- | // RA = dst*8, RD = src*8
-- | lwzux TMP1, RD, BASE
-- | lwz TMP0, 4(RD)
-+ | lwzx TMP1, BASE_HI, RD
-+ | lwzx TMP0, BASE_LO, RD
-+ |.if DUALNUM and not GPR64
-+ | mtxer ZERO
-+ |.endif
- | checknum TMP1
- |.if DUALNUM
- | bne >5
-@@ -3352,18 +3686,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.else
- | nego. TMP0, TMP0
- | bso >4
-- |1:
- |.endif
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- |3:
- | ins_next2
- |4:
-- |.if not GPR64
-- | // Potential overflow.
-- | checkov TMP1, <1 // Ignore unrelated overflow.
-- |.endif
- | lus TMP1, 0x41e0 // 2^31.
- | li TMP0, 0
- | b >7
-@@ -3373,8 +3702,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | xoris TMP1, TMP1, 0x8000
- |7:
- | ins_next1
-- | stwux TMP1, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP1, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- |.if DUALNUM
- | b <3
- |.else
-@@ -3383,15 +3712,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_LEN:
- | // RA = dst*8, RD = src*8
-- | lwzux TMP0, RD, BASE
-- | lwz CARG1, 4(RD)
-+ | lwzx TMP0, BASE_HI, RD
-+ | lwzx CARG1, BASE_LO, RD
- | checkstr TMP0; bne >2
- | lwz CRET1, STR:CARG1->len
- |1:
- |.if DUALNUM
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw CRET1, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx CRET1, BASE_LO, RA
- |.else
- | tonum_u f0, CRET1 // Result is a non-negative integer.
- | ins_next1
-@@ -3426,9 +3755,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzx TMP1, BASE, RB
-+ | .if ENDIAN_LE and DUALNUM
-+ | addi TMP2, RC, 4
-+ | .endif
-+ | lwzx TMP1, BASE_HI, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | .if ENDIAN_LE
-+ | lwzx TMP2, KBASE, TMP2
-+ | .else
-+ | lwzx TMP2, KBASE, RC
-+ | .endif
- | .endif
- | lfdx f14, BASE, RB
- | lfdx f15, KBASE, RC
-@@ -3442,9 +3778,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | .endif
- || break;
- ||case 1:
-- | lwzx TMP1, BASE, RB
-+ | .if ENDIAN_LE and DUALNUM
-+ | addi TMP2, RC, 4
-+ | .endif
-+ | lwzx TMP1, BASE_HI, RB
- | .if DUALNUM
-- | lwzx TMP2, KBASE, RC
-+ | .if ENDIAN_LE
-+ | lwzx TMP2, KBASE, TMP2
-+ | .else
-+ | lwzx TMP2, KBASE, RC
-+ | .endif
- | .endif
- | lfdx f15, BASE, RB
- | lfdx f14, KBASE, RC
-@@ -3458,8 +3801,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | .endif
- || break;
- ||default:
-- | lwzx TMP1, BASE, RB
-- | lwzx TMP2, BASE, RC
-+ | lwzx TMP1, BASE_HI, RB
-+ | lwzx TMP2, BASE_HI, RC
- | lfdx f14, BASE, RB
- | lfdx f15, BASE, RC
- | checknum cr0, TMP1
-@@ -3514,41 +3857,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | .if ENDIAN_LE
-+ | lwzux CARG2, RC, KBASE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwz TMP2, 4(RC)
-+ | checknum cr0, TMP1
-+ | lwzux CARG1, RB, BASE
-+ | .else
-+ | lwzux TMP1, RB, BASE
-+ | lwzux TMP2, RC, KBASE
-+ | lwz CARG1, 4(RB)
-+ | checknum cr0, TMP1
-+ | lwz CARG2, 4(RC)
-+ | .endif
- || break;
- ||case 1:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, KBASE
-- | lwz CARG2, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG1, 4(RC)
-+ | .if ENDIAN_LE
-+ | lwzux CARG1, RC, KBASE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwz TMP2, 4(RC)
-+ | checknum cr0, TMP1
-+ | lwzux CARG2, RB, BASE
-+ | .else
-+ | lwzux TMP1, RB, BASE
-+ | lwzux TMP2, RC, KBASE
-+ | lwz CARG2, 4(RB)
-+ | checknum cr0, TMP1
-+ | lwz CARG1, 4(RC)
-+ | .endif
- || break;
- ||default:
-- | lwzux TMP1, RB, BASE
-- | lwzux TMP2, RC, BASE
-- | lwz CARG1, 4(RB)
-- | checknum cr0, TMP1
-- | lwz CARG2, 4(RC)
-+ | .if ENDIAN_LE
-+ | lwzx TMP1, RB, BASE_HI
-+ | lwzx TMP2, RC, BASE_HI
-+ | lwzux CARG1, RB, BASE
-+ | checknum cr0, TMP1
-+ | lwzux CARG2, RC, BASE
-+ | .else
-+ | lwzux TMP1, RB, BASE
-+ | lwzux TMP2, RC, BASE
-+ | lwz CARG1, 4(RB)
-+ | checknum cr0, TMP1
-+ | lwz CARG2, 4(RC)
-+ | .endif
- || break;
- ||}
-+ | mtxer ZERO
- | checknum cr1, TMP2
- | bne >5
- | bne cr1, >5
- | intins CARG1, CARG1, CARG2
-- | bso >4
-- |1:
-+ | ins_arithfallback bso
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw CARG1, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx CARG1, BASE_LO, RA
- |2:
- | ins_next2
-- |4: // Overflow.
-- | checkov TMP0, <1 // Ignore unrelated overflow.
-- | ins_arithfallback b
- |5: // FP variant.
- ||if (vk == 1) {
- | lfd f15, 0(RB)
-@@ -3620,9 +3984,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_POW:
- | // NYI: (partial) integer arithmetic.
-- | lwzx TMP1, BASE, RB
-+ | lwzx TMP1, BASE_HI, RB
- | lfdx FARG1, BASE, RB
-- | lwzx TMP2, BASE, RC
-+ | lwzx TMP2, BASE_HI, RC
- | lfdx FARG2, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
-@@ -3648,6 +4012,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- | bne ->vmeta_binop
- | ins_next1
- | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
-@@ -3664,8 +4029,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next1
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
- | li TMP2, LJ_TSTR
-- | stwux TMP2, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP2, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- | ins_next2
- break;
- case BC_KCDATA:
-@@ -3676,8 +4041,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | ins_next1
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
- | li TMP2, LJ_TCDATA
-- | stwux TMP2, RA, BASE
-- | stw TMP0, 4(RA)
-+ | stwx TMP2, BASE_HI, RA
-+ | stwx TMP0, BASE_LO, RA
- | ins_next2
- |.endif
- break;
-@@ -3687,14 +4052,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi RD, RD, 13
- | srawi RD, RD, 16
- | ins_next1
-- | stwux TISNUM, RA, BASE
-- | stw RD, 4(RA)
-+ | stwx TISNUM, BASE_HI, RA
-+ | stwx RD, BASE_LO, RA
- | ins_next2
- |.else
- | // The soft-float approach is faster.
- | slwi RD, RD, 13
- | srawi TMP1, RD, 31
- | xor TMP2, TMP1, RD
-+ | .gpr64 extsw RD, RD
- | sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
- | cntlzw TMP3, TMP2
- | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
-@@ -3706,8 +4072,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RD, RD, TMP1 // hi = hi + exponent-1
- | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi
- | ins_next1
-- | stwux RD, RA, BASE
-- | stw ZERO, 4(RA)
-+ | stwx RD, BASE_HI, RA
-+ | stwx ZERO, BASE_LO, RA
- | ins_next2
- |.endif
- break;
-@@ -3723,15 +4089,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | srwi TMP1, RD, 3
- | not TMP0, TMP1
- | ins_next1
-- | stwx TMP0, BASE, RA
-+ | stwx TMP0, BASE_HI, RA
- | ins_next2
- break;
- case BC_KNIL:
- | // RA = base*8, RD = end*8
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | addi RA, RA, 8
- |1:
-- | stwx TISNIL, BASE, RA
-+ | stwx TISNIL, BASE_HI, RA
- | cmpw RA, RD
- | addi RA, RA, 8
- | blt <1
-@@ -3763,10 +4129,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz CARG2, UPVAL:RB->v
- | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
-- | lwz TMP2, 0(RD)
-+ | lwz TMP2, WORD_HI(RD)
- | stfd f0, 0(CARG2)
- | cmplwi cr1, TMP0, 0
-- | lwz TMP1, 4(RD)
-+ | lwz TMP1, WORD_LO(RD)
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | subi TMP2, TMP2, (LJ_TNUMX+1)
- | bne >2 // Upvalue is closed and black?
-@@ -3799,8 +4165,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP3, STR:TMP1->marked
- | lbz TMP2, UPVAL:RB->closed
- | li TMP0, LJ_TSTR
-- | stw STR:TMP1, 4(CARG2)
-- | stw TMP0, 0(CARG2)
-+ | stw STR:TMP1, WORD_LO(CARG2)
-+ | stw TMP0, WORD_HI(CARG2)
- | bne >2
- |1:
- | ins_next
-@@ -3837,7 +4203,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | ins_next1
- | lwz TMP1, UPVAL:RB->v
-- | stw TMP0, 0(TMP1)
-+ | stw TMP0, WORD_HI(TMP1)
- | ins_next2
- break;
-
-@@ -3852,6 +4218,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add CARG2, BASE, RA
- | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
- | lp BASE, L->base
-+ | addi BASEP4, BASE, 4
- |1:
- | ins_next
- break;
-@@ -3870,8 +4237,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Returns GCfuncL *.
- | lp BASE, L->base
- | li TMP0, LJ_TFUNC
-- | stwux TMP0, RA, BASE
-- | stw LFUNC:CRET1, 4(RA)
-+ | addi BASEP4, BASE, 4
-+ | stwx TMP0, BASE_HI, RA
-+ | stwx LFUNC:CRET1, BASE_LO, RA
- | ins_next
- break;
-
-@@ -3904,8 +4272,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- | lp BASE, L->base
- | li TMP0, LJ_TTAB
-- | stwux TMP0, RA, BASE
-- | stw TAB:CRET1, 4(RA)
-+ | addi BASEP4, BASE, 4
-+ | stwx TMP0, BASE_HI, RA
-+ | stwx TAB:CRET1, BASE_LO, RA
- | ins_next
- if (op == BC_TNEW) {
- |3:
-@@ -3938,13 +4307,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TGETV:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG2, RC, BASE
-- | lwz TAB:RB, 4(RB)
-+ | lwzx CARG1, BASE_HI, RB
-+ | lwzx CARG2, BASE_HI, RC
-+ | lwzx TAB:RB, BASE_LO, RB
- |.if DUALNUM
-- | lwz RC, 4(RC)
-+ | lwzx RC, BASE_LO, RC
- |.else
-- | lfd f0, 0(RC)
-+ | lfdx f0, BASE, RC
- |.endif
- | checktab CARG1
- | checknum cr1, CARG2
-@@ -3971,8 +4340,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP2, TMP2, 3
- |.endif
- | ble ->vmeta_tgetv // Integer key and in array part?
-- | lwzx TMP0, TMP1, TMP2
-- | lfdx f14, TMP1, TMP2
-+ | .if ENDIAN_LE
-+ | lfdux f14, TMP1, TMP2
-+ | lwz TMP0, WORD_HI(TMP1)
-+ | .else
-+ | lwzx TMP0, TMP1, TMP2
-+ | lfdx f14, TMP1, TMP2
-+ | .endif
- | checknil TMP0; beq >2
- |1:
- | ins_next1
-@@ -3991,15 +4365,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5:
- | checkstr CARG2; bne ->vmeta_tgetv
- |.if not DUALNUM
-- | lwz STR:RC, 4(RC)
-+ | lwzx STR:RC, BASE_LO, RC
- |.endif
- | b ->BC_TGETS_Z // String key?
- break;
- case BC_TGETS:
- | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | subfic TMP1, TMP1, -4
- | checktab CARG1
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
-@@ -4015,16 +4389,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
-- | lwz CARG1, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-- | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
-+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
-+ | lwz TMP1, WORD_LO+offsetof(Node, val)(NODE:TMP2)
- | checkstr CARG1; bne >4
- | cmpw TMP0, STR:RC; bne >4
- | checknil CARG2; beq >5 // Key found, but nil value?
- |3:
-- | stwux CARG2, RA, BASE
-- | stw TMP1, 4(RA)
-+ | stwx CARG2, BASE_HI, RA
-+ | stwx TMP1, BASE_LO, RA
- | ins_next
- |
- |4: // Follow hash chain.
-@@ -4045,15 +4419,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETB:
- | // RA = dst*8, RB = table*8, RC = index*8
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | checktab CARG1; bne ->vmeta_tgetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1; bge ->vmeta_tgetb
-- | lwzx TMP1, TMP2, RC
-- | lfdx f0, TMP2, RC
-+ | .if ENDIAN_LE
-+ | lfdux f0, TMP2, RC
-+ | lwz TMP1, WORD_HI(TMP2)
-+ | .else
-+ | lwzx TMP1, TMP2, RC
-+ | lfdx f0, TMP2, RC
-+ | .endif
- | checknil TMP1; beq >5
- |1:
- | ins_next1
-@@ -4071,12 +4450,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TGETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | add RB, BASE, RB
-- | lwz TAB:CARG1, 4(RB)
-+ | lwzx TAB:CARG1, BASE_LO, RB
- |.if DUALNUM
-- | add RC, BASE, RC
- | lwz TMP0, TAB:CARG1->asize
-- | lwz CARG2, 4(RC)
-+ | lwzx CARG2, BASE_LO, RC
- | lwz TMP1, TAB:CARG1->array
- |.else
- | lfdx f0, BASE, RC
-@@ -4096,13 +4473,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-
- case BC_TSETV:
- | // RA = src*8, RB = table*8, RC = key*8
-- | lwzux CARG1, RB, BASE
-- | lwzux CARG2, RC, BASE
-- | lwz TAB:RB, 4(RB)
-+ | lwzx CARG1, BASE_HI, RB
-+ | lwzx CARG2, BASE_HI, RC
-+ | lwzx TAB:RB, BASE_LO, RB
- |.if DUALNUM
-- | lwz RC, 4(RC)
-+ | lwzx RC, BASE_LO, RC
- |.else
-- | lfd f0, 0(RC)
-+ | lfdx f0, BASE, RC
- |.endif
- | checktab CARG1
- | checknum cr1, CARG2
-@@ -4129,7 +4506,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | slwi TMP0, TMP2, 3
- |.endif
- | ble ->vmeta_tsetv // Integer key and in array part?
-+ | .if ENDIAN_LE
-+ | addi TMP2, TMP1, 4
-+ | lwzx TMP2, TMP2, TMP0
-+ | .else
- | lwzx TMP2, TMP1, TMP0
-+ | .endif
- | lbz TMP3, TAB:RB->marked
- | lfdx f14, BASE, RA
- | checknil TMP2; beq >3
-@@ -4152,7 +4534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5:
- | checkstr CARG2; bne ->vmeta_tsetv
- |.if not DUALNUM
-- | lwz STR:RC, 4(RC)
-+ | lwzx STR:RC, BASE_LO, RC
- |.endif
- | b ->BC_TSETS_Z // String key?
- |
-@@ -4162,9 +4544,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETS:
- | // RA = src*8, RB = table*8, RC = str_const*8 (~)
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP1, RC, 1
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | subfic TMP1, TMP1, -4
- | checktab CARG1
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
-@@ -4183,9 +4565,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lbz TMP3, TAB:RB->marked
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
-- | lwz CARG1, NODE:TMP2->key
-- | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
-- | lwz CARG2, NODE:TMP2->val
-+ | lwz CARG1, WORD_HI+offsetof(Node, key)(NODE:TMP2)
-+ | lwz TMP0, WORD_LO+offsetof(Node, key)(NODE:TMP2)
-+ | lwz CARG2, WORD_HI+offsetof(Node, val)(NODE:TMP2)
- | lwz NODE:TMP1, NODE:TMP2->next
- | checkstr CARG1; bne >5
- | cmpw TMP0, STR:RC; bne >5
-@@ -4225,13 +4607,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
- | li TMP0, LJ_TSTR
-- | stw STR:RC, 4(CARG3)
-+ | stw STR:RC, WORD_LO(CARG3)
- | mr CARG2, TAB:RB
-- | stw TMP0, 0(CARG3)
-+ | stw TMP0, WORD_HI(CARG3)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lp BASE, L->base
- | stfd f14, 0(CRET1)
-+ | addi BASEP4, BASE, 4
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
-@@ -4240,9 +4623,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETB:
- | // RA = src*8, RB = table*8, RC = index*8
-- | lwzux CARG1, RB, BASE
-+ | lwzx CARG1, BASE_HI, RB
- | srwi TMP0, RC, 3
-- | lwz TAB:RB, 4(RB)
-+ | lwzx TAB:RB, BASE_LO, RB
- | checktab CARG1; bne ->vmeta_tsetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
-@@ -4250,7 +4633,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cmplw TMP0, TMP1
- | lfdx f14, BASE, RA
- | bge ->vmeta_tsetb
-- | lwzx TMP1, TMP2, RC
-+ | .if ENDIAN_LE
-+ | addi TMP1, TMP2, 4
-+ | lwzx TMP1, TMP1, RC
-+ | .else
-+ | lwzx TMP1, TMP2, RC
-+ | .endif
- | checknil TMP1; beq >5
- |1:
- | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
-@@ -4274,13 +4662,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_TSETR:
- | // RA = dst*8, RB = table*8, RC = key*8
-- | add RB, BASE, RB
-- | lwz TAB:CARG2, 4(RB)
-+ | lwzx TAB:CARG2, BASE_LO, RB
- |.if DUALNUM
-- | add RC, BASE, RC
- | lbz TMP3, TAB:CARG2->marked
- | lwz TMP0, TAB:CARG2->asize
-- | lwz CARG3, 4(RC)
-+ | lwzx CARG3, BASE_LO, RC
- | lwz TMP1, TAB:CARG2->array
- |.else
- | lfdx f0, BASE, RC
-@@ -4311,9 +4697,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RA, BASE, RA
- |1:
- | add TMP3, KBASE, RD
-- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
-+ | lwz TAB:CARG2, WORD_LO-8(RA) // Guaranteed to be a table.
- | addic. TMP0, MULTRES, -8
-- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
-+ | lwz TMP3, WORD_LO(TMP3) // Integer constant is in lo-word.
- | srwi CARG3, TMP0, 3
- | beq >4 // Nothing to copy?
- | add CARG3, CARG3, TMP3
-@@ -4362,8 +4748,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_CALL:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
- | mr TMP2, BASE
-- | lwzux TMP0, BASE, RA
-- | lwz LFUNC:RB, 4(BASE)
-+ | lwzux2 TMP0, LFUNC:RB, BASE, RA
- | subi NARGS8:RC, NARGS8:RC, 8
- | addi BASE, BASE, 8
- | checkfunc TMP0; bne ->vmeta_call
-@@ -4377,8 +4762,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- break;
- case BC_CALLT:
- | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
-- | lwzux TMP0, RA, BASE
-- | lwz LFUNC:RB, 4(RA)
-+ | lwzux2 TMP0, LFUNC:RB, RA, BASE
- | subi NARGS8:RC, NARGS8:RC, 8
- | lwz TMP1, FRAME_PC(BASE)
- | checkfunc TMP0
-@@ -4430,12 +4814,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
- | mr TMP2, BASE
- | add BASE, BASE, RA
-- | lwz TMP1, -24(BASE)
-- | lwz LFUNC:RB, -20(BASE)
-+ | lwz TMP1, WORD_HI-24(BASE)
-+ | lwz LFUNC:RB, WORD_LO-24(BASE)
- | lfd f1, -8(BASE)
- | lfd f0, -16(BASE)
-- | stw TMP1, 0(BASE) // Copy callable.
-- | stw LFUNC:RB, 4(BASE)
-+ | stw TMP1, WORD_HI(BASE) // Copy callable.
-+ | stw LFUNC:RB, WORD_LO(BASE)
- | checkfunc TMP1
- | stfd f1, 16(BASE) // Copy control var.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
-@@ -4450,8 +4834,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // NYI: add hotloop, record BC_ITERN.
- |.endif
- | add RA, BASE, RA
-- | lwz TAB:RB, -12(RA)
-- | lwz RC, -4(RA) // Get index from control var.
-+ | lwz TAB:RB, WORD_LO-16(RA)
-+ | lwz RC, WORD_LO-8(RA) // Get index from control var.
- | lwz TMP0, TAB:RB->asize
- | lwz TMP1, TAB:RB->array
- | addi PC, PC, 4
-@@ -4459,14 +4843,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | cmplw RC, TMP0
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
-- | lwzx TMP2, TMP1, TMP3
-- | lfdx f0, TMP1, TMP3
-+ | lfdux f0, TMP3, TMP1
-+ | lwz TMP2, WORD_HI(TMP3)
- | checknil TMP2
- | lwz INS, -4(PC)
- | beq >4
- |.if DUALNUM
-- | stw RC, 4(RA)
-- | stw TISNUM, 0(RA)
-+ | stw RC, WORD_LO(RA)
-+ | stw TISNUM, WORD_HI(RA)
- |.else
- | tonum_u f1, RC
- |.endif
-@@ -4474,7 +4858,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | stfd f0, 8(RA)
- | decode_RD4 TMP1, INS
-- | stw RC, -4(RA) // Update control var.
-+ | stw RC, WORD_LO-8(RA) // Update control var.
- | add PC, TMP1, TMP3
- |.if not DUALNUM
- | stfd f1, 0(RA)
-@@ -4496,9 +4880,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgty <3
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
-- | lwzx RB, TMP2, TMP3
-- | lfdx f0, TMP2, TMP3
-- | add NODE:TMP3, TMP2, TMP3
-+ | lfdux f0, TMP3, TMP2
-+ | lwz RB, WORD_HI(TMP3)
- | checknil RB
- | lwz INS, -4(PC)
- | beq >7
-@@ -4510,7 +4893,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | stfd f1, 0(RA)
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
-- | stw RC, -4(RA) // Update control var.
-+ | stw RC, WORD_LO-8(RA) // Update control var.
- | b <3
- |
- |7: // Skip holes in hash part.
-@@ -4521,10 +4904,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_ISNEXT:
- | // RA = base*8, RD = target (points to ITERN)
- | add RA, BASE, RA
-- | lwz TMP0, -24(RA)
-- | lwz CFUNC:TMP1, -20(RA)
-- | lwz TMP2, -16(RA)
-- | lwz TMP3, -8(RA)
-+ | lwz TMP0, WORD_HI-24(RA)
-+ | lwz CFUNC:TMP1, WORD_LO-24(RA)
-+ | lwz TMP2, WORD_HI-16(RA)
-+ | lwz TMP3, WORD_HI-8(RA)
- | cmpwi cr0, TMP2, LJ_TTAB
- | cmpwi cr1, TMP0, LJ_TFUNC
- | cmpwi cr6, TMP3, LJ_TNIL
-@@ -4538,17 +4921,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bne cr0, >5
- | lus TMP1, 0xfffe
- | ori TMP1, TMP1, 0x7fff
-- | stw ZERO, -4(RA) // Initialize control var.
-- | stw TMP1, -8(RA)
-+ | stw ZERO, WORD_LO-8(RA) // Initialize control var.
-+ | stw TMP1, WORD_HI-8(RA)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- |1:
- | ins_next
- |5: // Despecialize bytecode if any of the checks fail.
- | li TMP0, BC_JMP
- | li TMP1, BC_ITERC
-+ | .if ENDIAN_LE
-+ | stb TMP0, -4(PC)
-+ | .else
- | stb TMP0, -1(PC)
-+ | .endif
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
-+ | .if ENDIAN_LE
-+ | stb TMP1, 0(PC)
-+ | .else
- | stb TMP1, 3(PC)
-+ | .endif
- | b <1
- break;
-
-@@ -4582,7 +4973,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addi RA, RA, 8
- | blt cr1, <1 // More vararg slots?
- |2: // Fill up remainder with nil.
-- | stw TISNIL, 0(RA)
-+ | stw TISNIL, WORD_HI(RA)
- | cmplw RA, TMP2
- | addi RA, RA, 8
- | blt <2
-@@ -4619,6 +5010,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | add RA, BASE, RA
- | add RC, BASE, SAVE0
- | subi TMP3, BASE, 8
-+ | addi BASEP4, BASE, 4
- | b <6
- break;
-
-@@ -4667,13 +5059,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-+ | addi BASEP4, BASE, 4
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, TMP2, TMP1
- | b <5
-@@ -4709,13 +5102,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
-+ | addi BASEP4, BASE, 4
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
-- | subi TMP1, RD, 8
-+ | addi TMP1, RD, WORD_HI-8
- | addi RD, RD, 8
- | stwx TISNIL, TMP2, TMP1
- | b <5
-@@ -4741,11 +5135,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- vk = (op == BC_IFORL || op == BC_JFORL);
- |.if DUALNUM
- | // Integer loop.
-- | lwzux TMP1, RA, BASE
-- | lwz CARG1, FORL_IDX*8+4(RA)
-+ | lwzux2 TMP1, CARG1, RA, BASE
-+ if (vk) {
-+ | mtxer ZERO
-+ }
- | cmplw cr0, TMP1, TISNUM
- if (vk) {
-- | lwz CARG3, FORL_STEP*8+4(RA)
-+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
- | bne >9
- |.if GPR64
- | // Need to check overflow for (a<<32) + (b<<32).
-@@ -4757,15 +5153,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | addo. CARG1, CARG1, CARG3
- |.endif
- | cmpwi cr6, CARG3, 0
-- | lwz CARG2, FORL_STOP*8+4(RA)
-- | bso >6
-+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
-+ | bso >2
- |4:
-- | stw CARG1, FORL_IDX*8+4(RA)
-+ | stw CARG1, FORL_IDX*8+WORD_LO(RA)
- } else {
-- | lwz TMP3, FORL_STEP*8(RA)
-- | lwz CARG3, FORL_STEP*8+4(RA)
-- | lwz TMP2, FORL_STOP*8(RA)
-- | lwz CARG2, FORL_STOP*8+4(RA)
-+ | lwz TMP3, FORL_STEP*8+WORD_HI(RA)
-+ | lwz CARG3, FORL_STEP*8+WORD_LO(RA)
-+ | lwz TMP2, FORL_STOP*8+WORD_HI(RA)
-+ | lwz CARG2, FORL_STOP*8+WORD_LO(RA)
- | cmplw cr7, TMP3, TISNUM
- | cmplw cr1, TMP2, TISNUM
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
-@@ -4776,11 +5172,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | blt cr6, >5
- | cmpw CARG1, CARG2
- |1:
-- | stw TISNUM, FORL_EXT*8(RA)
-+ | stw TISNUM, FORL_EXT*8+WORD_HI(RA)
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- }
-- | stw CARG1, FORL_EXT*8+4(RA)
-+ | stw CARG1, FORL_EXT*8+WORD_LO(RA)
- if (op != BC_JFORL) {
- | add RD, PC, RD
- }
-@@ -4800,11 +5196,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |5: // Invert check for negative step.
- | cmpw CARG2, CARG1
- | b <1
-- if (vk) {
-- |6: // Potential overflow.
-- | checkov TMP0, <4 // Ignore unrelated overflow.
-- | b <2
-- }
- |.endif
- if (vk) {
- |.if DUALNUM
-@@ -4815,14 +5206,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- |.endif
- | lfd f3, FORL_STEP*8(RA)
- | lfd f2, FORL_STOP*8(RA)
-- | lwz TMP3, FORL_STEP*8(RA)
-+ | lwz TMP3, FORL_STEP*8+WORD_HI(RA)
- | fadd f1, f1, f3
- | stfd f1, FORL_IDX*8(RA)
- } else {
- |.if DUALNUM
- |9: // FP loop.
- |.else
-+ |.if ENDIAN_LE
-+ | lwzx TMP1, RA, BASE_LO
-+ | add RA, RA, BASE
-+ |.else
- | lwzux TMP1, RA, BASE
-+ |.endif
- | lwz TMP3, FORL_STEP*8(RA)
- | lwz TMP2, FORL_STOP*8(RA)
- | cmplw cr0, TMP1, TISNUM
-@@ -4903,17 +5299,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- #endif
- case BC_IITERL:
- | // RA = base*8, RD = target
-- | lwzux TMP1, RA, BASE
-- | lwz TMP2, 4(RA)
-+ | lwzux2 TMP1, TMP2, RA, BASE
- | checknil TMP1; beq >1 // Stop if iterator returned nil.
- if (op == BC_JITERL) {
-- | stw TMP1, -8(RA)
-- | stw TMP2, -4(RA)
-+ | stw TMP1, WORD_HI-8(RA)
-+ | stw TMP2, WORD_LO-8(RA)
- | b =>BC_JLOOP
- } else {
- | branch_RD // Otherwise save control var + branch.
-- | stw TMP1, -8(RA)
-- | stw TMP2, -4(RA)
-+ | stw TMP1, WORD_HI-8(RA)
-+ | stw TMP2, WORD_LO-8(RA)
- }
- |1:
- | ins_next
-@@ -4942,7 +5337,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | // Traces on PPC don't store the trace number, so use 0.
- | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH)
- | lwzx TRACE:TMP2, TMP1, RD
-- | clrso TMP1
-+ | mtxer ZERO
- | lp TMP2, TRACE:TMP2->mcode
- | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
- | mtctr TMP2
-@@ -4994,7 +5389,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- }
- |
- |3: // Clear missing parameters.
-- | stwx TISNIL, BASE, NARGS8:RC
-+ | stwx TISNIL, BASE_HI, NARGS8:RC
- | addi NARGS8:RC, NARGS8:RC, 8
- | b <2
- break;
-@@ -5011,11 +5406,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | lwz TMP2, L->maxstack
- | add TMP1, BASE, RC
- | add TMP0, RA, RC
-- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
-+ | stw LFUNC:RB, WORD_LO(TMP1) // Store copy of LFUNC.
- | addi TMP3, RC, 8+FRAME_VARG
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw TMP0, TMP2
-- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
-+ | stw TMP3, WORD_HI(TMP1) // Store delta + FRAME_VARG.
- | bge ->vm_growstack_l
- | lbz TMP2, -4+PC2PROTO(numparams)(PC)
- | mr RA, BASE
-@@ -5026,18 +5421,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- | beq >3
- |1:
- | cmplw RA, RC // Less args than parameters?
-- | lwz TMP0, 0(RA)
-- | lwz TMP3, 4(RA)
-+ | lwz TMP0, WORD_HI(RA)
-+ | lwz TMP3, WORD_LO(RA)
- | bge >4
-- | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
-+ | stw TISNIL, WORD_HI(RA) // Clear old fixarg slot (help the GC).
- | addi RA, RA, 8
- |2:
- | addic. TMP2, TMP2, -1
-- | stw TMP0, 8(TMP1)
-- | stw TMP3, 12(TMP1)
-+ | stw TMP0, WORD_HI+8(TMP1)
-+ | stw TMP3, WORD_LO+8(TMP1)
- | addi TMP1, TMP1, 8
- | bne <1
- |3:
-+ | addi BASEP4, BASE, 4
- | ins_next2
- |
- |4: // Clear missing parameters.
-@@ -5049,35 +5445,35 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
- case BC_FUNCCW:
- | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
- if (op == BC_FUNCC) {
-- | lp RD, CFUNC:RB->f
-+ | lp FUNCREG, CFUNC:RB->f
- } else {
-- | lp RD, DISPATCH_GL(wrapf)(DISPATCH)
-+ | lp FUNCREG, DISPATCH_GL(wrapf)(DISPATCH)
- }
- | add TMP1, RA, NARGS8:RC
- | lwz TMP2, L->maxstack
-- | .toc lp TMP3, 0(RD)
-+ | .opd lp TMP3, 0(FUNCREG)
- | add RC, BASE, NARGS8:RC
- | stp BASE, L->base
- | cmplw TMP1, TMP2
- | stp RC, L->top
- | li_vmstate C
-- |.if TOC
-+ |.if OPD
- | mtctr TMP3
- |.else
-- | mtctr RD
-+ | mtctr FUNCREG
- |.endif
- if (op == BC_FUNCCW) {
- | lp CARG2, CFUNC:RB->f
- }
- | mr CARG1, L
- | bgt ->vm_growstack_c // Need to grow stack.
-- | .toc lp TOCREG, TOC_OFS(RD)
-- | .tocenv lp ENVREG, ENV_OFS(RD)
-+ | .opd lp TOCREG, TOC_OFS(FUNCREG)
-+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG)
- | st_vmstate
- | bctrl // (lua_State *L [, lua_CFunction f])
-+ | .toc lp TOCREG, SAVE_TOC
- | // Returns nresults.
- | lp BASE, L->base
-- | .toc ld TOCREG, SAVE_TOC
- | slwi RD, CRET1, 3
- | lp TMP1, L->top
- | li_vmstate INTERP
-@@ -5128,7 +5524,11 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x1\n"
- "\t.string \"\"\n"
- "\t.uleb128 0x1\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.sleb128 -8\n"
-+#else
- "\t.sleb128 -4\n"
-+#endif
- "\t.byte 65\n"
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
-@@ -5141,14 +5541,24 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long .Lbegin\n"
- "\t.long %d\n"
- "\t.byte 0xe\n\t.uleb128 %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-+#endif
- fcofs, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte %d\n\t.uleb128 %d\n",
-- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
-+#if LJ_ARCH_PPC32ON64
-+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
-+#else
-+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
-+#endif
-+ );
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE0:\n\n");
-@@ -5164,8 +5574,12 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long lj_vm_ffi_call\n"
- #endif
- "\t.long %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-- "\t.byte 0x8e\n\t.uleb128 2\n"
-+#endif
-+ "\t.byte 0x8e\n\t.uleb128 1\n"
- "\t.byte 0xd\n\t.uleb128 0xe\n"
- "\t.align 2\n"
- ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
-@@ -5180,7 +5594,11 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.byte 0x1\n"
- "\t.string \"zPR\"\n"
- "\t.uleb128 0x1\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.sleb128 -8\n"
-+#else
- "\t.sleb128 -4\n"
-+#endif
- "\t.byte 65\n"
- "\t.uleb128 6\n" /* augmentation length */
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
-@@ -5198,14 +5616,24 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
- "\t.byte 0xe\n\t.uleb128 %d\n"
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+ "\t.byte 0x11\n\t.uleb128 70\n\t.sleb128 -1\n",
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
-+#endif
- fcofs, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte %d\n\t.uleb128 %d\n",
-- 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
-+#if LJ_ARCH_PPC32ON64
-+ 0x80+i, 19+(31-i), 0x80+32+i, 1+(31-i)
-+#else
-+ 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)
-+#endif
-+ );
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE2:\n\n");
-@@ -5233,8 +5661,12 @@ static void emit_asm_debug(BuildCtx *ctx)
- "\t.long lj_vm_ffi_call-.\n"
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
-+#if LJ_ARCH_PPC32ON64
-+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n"
-+#else
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
-- "\t.byte 0x8e\n\t.uleb128 2\n"
-+#endif
-+ "\t.byte 0x8e\n\t.uleb128 1\n"
- "\t.byte 0xd\n\t.uleb128 0xe\n"
- "\t.align 2\n"
- ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
-
diff --git a/main/luajit/APKBUILD b/main/luajit/APKBUILD
index 27e50ce7a0..fdf2cc3d36 100644
--- a/main/luajit/APKBUILD
+++ b/main/luajit/APKBUILD
@@ -1,7 +1,7 @@
# Maintainer: Jakub Jirutka <jakub@jirutka.cz>
# Contributor: Bartłomiej Piotrowski <nospam@bpiotrowski.pl>
pkgname=luajit
-_pkgname=LuaJIT
+_pkgname=moonjit
pkgver=5.1.20190925
_pkgver=fedora-${pkgver##*.}
_compatver=2.1.0-beta3
@@ -10,22 +10,17 @@ pkgdesc="Integration fork of the original LuaJIT"
url="https://github.com/siddhesh/LuaJIT"
arch="all"
license="MIT"
-makedepends="$depends_dev paxmark"
provides="lua"
subpackages="$pkgname-dev $pkgname-doc"
-source="https://github.com/siddhesh/$pkgname/archive/$pkgname-$_pkgver.tar.gz
+source="https://github.com/siddhesh/luajit/archive/luajit-$_pkgver.tar.gz
module-paths.patch"
builddir="$srcdir/$_pkgname-$pkgname-$_pkgver"
build() {
- cd "$builddir"
-
make amalg PREFIX=/usr
}
check() {
- cd "$builddir"
-
# tests fail
sed -i -e '/redir.lua/d' test/lib/ffi/index
case $CARCH in
@@ -35,14 +30,8 @@ check() {
}
package() {
- cd "$builddir"
-
make install DESTDIR="$pkgdir" PREFIX=/usr
- local paxflags="-m"
- [ "$CARCH" = "x86" ] && paxflags="-msp"
- paxmark $paxflags "$pkgdir"/usr/bin/luajit-$_compatver
-
install -Dm644 COPYRIGHT \
"$pkgdir"/usr/share/licenses/$pkgname/COPYRIGHT
}
diff --git a/main/luajit/s390x.patch b/main/luajit/s390x.patch
deleted file mode 100644
index 15f51b8ca0..0000000000
--- a/main/luajit/s390x.patch
+++ /dev/null
@@ -1,43692 +0,0 @@
-From 8d336e1299c4af83df61aed8f59171b15140f0d9 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 10 Nov 2016 10:33:16 +0530
-Subject: [PATCH 001/260] Create lj_target_s390x.h
-
-Adding file lj_target_s390x.h
-Few arm based instructions are changed with equivalent s390x instructions
----
- src/lj_target_s390x.h | 287 +++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 287 insertions(+)
- create mode 100644 src/lj_target_s390x.h
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-new file mode 100644
-index 000000000..7da2063d2
---- /dev/null
-+++ b/src/lj_target_s390x.h
-@@ -0,0 +1,287 @@
-+/*
-+** Definitions for S390 CPUs.
-+** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
-+*/
-+
-+#ifndef _LJ_TARGET_S390_H
-+#define _LJ_TARGET_S390_H
-+
-+/* -- Registers IDs ------------------------------------------------------- */
-+
-+#define GPRDEF(_) \
-+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
-+#if LJ_SOFTFP
-+#define FPRDEF(_)
-+#else
-+#define FPRDEF(_) \
-+ _(F0) _(F2) _(F4) _(F6)
-+#endif
-+#define VRIDDEF(_)
-+
-+#define RIDENUM(name) RID_##name,
-+
-+enum {
-+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
-+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
-+ RID_MAX,
-+ RID_TMP = RID_LR,
-+
-+ /* Calling conventions. */
-+ RID_RET = RID_R0,
-+ RID_RETLO = RID_R0,
-+ RID_RETHI = RID_R1,
-+#if LJ_SOFTFP
-+ RID_FPRET = RID_R0,
-+#else
-+ RID_FPRET = RID_D0,
-+#endif
-+
-+ /* These definitions must match with the *.dasc file(s): */
-+ RID_BASE = RID_R9, /* Interpreter BASE. */
-+ RID_LPC = RID_R6, /* Interpreter PC. */
-+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
-+ RID_LREG = RID_R8, /* Interpreter L. */
-+
-+ /* Register ranges [min, max) and number of registers. */
-+ RID_MIN_GPR = RID_R0,
-+ RID_MAX_GPR = RID_PC+1,
-+ RID_MIN_FPR = RID_MAX_GPR,
-+#if LJ_SOFTFP
-+ RID_MAX_FPR = RID_MIN_FPR,
-+#else
-+ RID_MAX_FPR = RID_D15+1,
-+#endif
-+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
-+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
-+};
-+
-+#define RID_NUM_KREF RID_NUM_GPR
-+#define RID_MIN_KREF RID_R0
-+
-+/* -- Register sets ------------------------------------------------------- */
-+
-+/* Make use of all registers, except sp, lr and pc. */
-+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
-+#define RSET_GPREVEN \
-+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
-+ RID2RSET(RID_R8)|RID2RSET(RID_R10))
-+#define RSET_GPRODD \
-+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
-+ RID2RSET(RID_R9)|RID2RSET(RID_R11))
-+#if LJ_SOFTFP
-+#define RSET_FPR 0
-+#else
-+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
-+#endif
-+#define RSET_ALL (RSET_GPR|RSET_FPR)
-+#define RSET_INIT RSET_ALL
-+
-+/* ABI-specific register sets. lr is an implicit scratch register. */
-+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
-+#ifdef __APPLE__
-+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
-+#else
-+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
-+#endif
-+#if LJ_SOFTFP
-+#define RSET_SCRATCH_FPR 0
-+#else
-+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
-+#endif
-+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
-+#define REGARG_FIRSTGPR RID_R0
-+#define REGARG_LASTGPR RID_R3
-+#define REGARG_NUMGPR 4
-+#if LJ_ABI_SOFTFP
-+#define REGARG_FIRSTFPR 0
-+#define REGARG_LASTFPR 0
-+#define REGARG_NUMFPR 0
-+#else
-+#define REGARG_FIRSTFPR RID_D0
-+#define REGARG_LASTFPR RID_D7
-+#define REGARG_NUMFPR 8
-+#endif
-+
-+/* -- Spill slots --------------------------------------------------------- */
-+
-+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
-+**
-+** SPS_FIXED: Available fixed spill slots in interpreter frame.
-+** This definition must match with the *.dasc file(s).
-+**
-+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
-+*/
-+#define SPS_FIXED 2
-+#define SPS_FIRST 2
-+
-+#define SPOFS_TMP 0
-+
-+#define sps_scale(slot) (4 * (int32_t)(slot))
-+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
-+
-+/* -- Exit state ---------------------------------------------------------- */
-+
-+/* This definition must match with the *.dasc file(s). */
-+typedef struct {
-+#if !LJ_SOFTFP
-+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
-+#endif
-+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
-+ int32_t spill[256]; /* Spill slots. */
-+} ExitState;
-+
-+/* PC after instruction that caused an exit. Used to find the trace number. */
-+#define EXITSTATE_PCREG RID_PC
-+/* Highest exit + 1 indicates stack check. */
-+#define EXITSTATE_CHECKEXIT 1
-+
-+#define EXITSTUB_SPACING 4
-+#define EXITSTUBS_PER_GROUP 32
-+
-+/* -- Instructions -------------------------------------------------------- */
-+
-+/* Instruction fields. */
-+#define ARMF_CC(ai, cc) (((ai) ^ ARMI_CCAL) | ((cc) << 28))
-+#define ARMF_N(r) ((r) << 16)
-+#define ARMF_D(r) ((r) << 12)
-+#define ARMF_S(r) ((r) << 8)
-+#define ARMF_M(r) (r)
-+#define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
-+#define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-+
-+typedef enum S390Ins {
-+
-+ // Unsupported in S390
-+ #ARMI_LDRSB = 0xe01000d0,
-+ #ARMI_S = 0x000100000,
-+ #ARMI_LDRD = 0xe00000d0,
-+ #ARMI_ADC = 0xe0a00000,
-+ #ARMI_SBC = 0xe0c00000,
-+ #ARMI_STRB = 0xe4400000,
-+ #ARMI_STRH = 0xe00000b0,
-+ #ARMI_STRD = 0xe00000f0,
-+ #ARMI_BL = 0xeb000000,
-+ #ARMI_BLX = 0xfa000000,
-+ #ARMI_BLXr = 0xe12fff30,
-+ #ARMI_BIC = 0xe1c00000,
-+ #ARMI_ORR = 0xe1800000,
-+ #ARMI_LDRB = 0xe4500000,
-+ #ARMI_MVN = 0xe1e00000,
-+ #ARMI_LDRSH = 0xe01000f0,
-+ #ARMI_NOP = 0xe1a00000,
-+ #ARMI_PUSH = 0xe92d0000,
-+ #ARMI_RSB = 0xe0600000,
-+ #ARMI_RSC = 0xe0e00000,
-+ #ARMI_TEQ = 0xe1300000,
-+ #ARMI_CCAL = 0xe0000000,
-+ #ARMI_K12 = 0x02000000,
-+ #ARMI_KNEG = 0x00200000,
-+ #ARMI_LS_W = 0x00200000,
-+ #ARMI_LS_U = 0x00800000,
-+ #ARMI_LS_P = 0x01000000,
-+ #ARMI_LS_R = 0x02000000,
-+ #ARMI_LSX_I = 0x00400000,
-+
-+
-+ #ARMI_SUB = 0xe0400000,
-+ #ARMI_ADD = 0xe0800000,
-+ #ARMI_AND = 0xe0000000,
-+ #ARMI_EOR = 0xe0200000,
-+ #ARMI_MUL = 0xe0000090,
-+ #ARMI_LDR = 0xe4100000,
-+ #ARMI_CMP = 0xe1500000,
-+ #ARMI_LDRH = 0xe01000b0,
-+ #ARMI_B = 0xea000000,
-+ #ARMI_MOV = 0xe1a00000,
-+ #ARMI_STR = 0xe4000000,
-+ #ARMI_TST = 0xe1100000,
-+ #ARMI_SMULL = 0xe0c00090,
-+ #ARMI_CMN = 0xe1700000,
-+ S390I_SR = 0x1B000000,
-+ S390I_AR = 0x1A000000,
-+ S390I_NR = 0x14000000,
-+ S390I_XR = 0x17000000,
-+ S390I_MR = 0x1C000000,
-+ S390I_LR = 0x18000000,
-+ S390I_C = 0x59000000,
-+ S390I_LH = 0x48000000,
-+ S390I_BASR = 0x0D000000,
-+ S390I_MVCL = 0x0e000000,
-+ S390I_ST = 0x50000000,
-+ S390I_TM = 0x91000000,
-+ S390I_MP = 0xbd000090,
-+ S390I_CLR = 0x15000000,
-+
-+ /* ARMv6 */
-+ #ARMI_REV = 0xe6bf0f30,
-+ #ARMI_SXTB = 0xe6af0070,
-+ #ARMI_SXTH = 0xe6bf0070,
-+ #ARMI_UXTB = 0xe6ef0070,
-+ #ARMI_UXTH = 0xe6ff0070,
-+
-+ /* ARMv6T2 */
-+ #ARMI_MOVW = 0xe3000000,
-+ #ARMI_MOVT = 0xe3400000,
-+
-+ /* VFP */
-+ ARMI_VMOV_D = 0xeeb00b40,
-+ ARMI_VMOV_S = 0xeeb00a40,
-+ ARMI_VMOVI_D = 0xeeb00b00,
-+
-+ ARMI_VMOV_R_S = 0xee100a10,
-+ ARMI_VMOV_S_R = 0xee000a10,
-+ ARMI_VMOV_RR_D = 0xec500b10,
-+ ARMI_VMOV_D_RR = 0xec400b10,
-+
-+ ARMI_VADD_D = 0xee300b00,
-+ ARMI_VSUB_D = 0xee300b40,
-+ ARMI_VMUL_D = 0xee200b00,
-+ ARMI_VMLA_D = 0xee000b00,
-+ ARMI_VMLS_D = 0xee000b40,
-+ ARMI_VNMLS_D = 0xee100b00,
-+ ARMI_VDIV_D = 0xee800b00,
-+
-+ ARMI_VABS_D = 0xeeb00bc0,
-+ ARMI_VNEG_D = 0xeeb10b40,
-+ ARMI_VSQRT_D = 0xeeb10bc0,
-+
-+ ARMI_VCMP_D = 0xeeb40b40,
-+ ARMI_VCMPZ_D = 0xeeb50b40,
-+
-+ ARMI_VMRS = 0xeef1fa10,
-+
-+ ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-+ ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-+ ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-+ ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-+ ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-+ ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-+ ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-+ ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-+ ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-+ ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-+ ARMI_VCVT_F32_U32 = 0xeeb80a40,
-+ ARMI_VCVT_F64_U32 = 0xeeb80b40,
-+ ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-+ ARMI_VCVT_F64_F32 = 0xeeb70ac0,
-+
-+ ARMI_VLDR_S = 0xed100a00,
-+ ARMI_VLDR_D = 0xed100b00,
-+ ARMI_VSTR_S = 0xed000a00,
-+ ARMI_VSTR_D = 0xed000b00,
-+} S390Ins;
-+
-+typedef enum S390Shift {
-+ S390SH_SLL, S390SH_SRL, S390SH_SRA
-+ # Adjustment needed for ROR
-+} S390Shift;
-+
-+/* ARM condition codes. */
-+typedef enum ARMCC {
-+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-+ CC_HS = CC_CS, CC_LO = CC_CC
-+} ARMCC;
-+
-+#endif
-
-From 096a33d925ea91bc442cea4f1a8d7c54e81b1f45 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 10 Nov 2016 10:35:35 +0530
-Subject: [PATCH 002/260] Update Makefile
-
-Added condition for s390 in Makefile
----
- src/Makefile | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/Makefile b/src/Makefile
-index 4e479ae5a..9f7d28ce3 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -238,6 +238,9 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
-+ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= s390
-+else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm64
- else
-
-From be89c18b9827a2cb4ea7807a69e253db075ec7e5 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 10 Nov 2016 10:42:51 +0530
-Subject: [PATCH 003/260] Update lj_arch.h
-
-Added supporting lines for s390
-Lines added using arm lines as reference
----
- src/lj_arch.h | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index cc5a0a66d..5155bf691 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,6 +29,7 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
-+#define LUAJIT_ARCH_S390 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -49,6 +50,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
-+#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
-@@ -230,6 +233,23 @@
-
- #define LJ_ARCH_VERSION 80
-
-+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
-+
-+ #define LJ_ARCH_NAME "s390"
-+ #define LJ_ARCH_BITS 64
-+ #define LJ_ARCH_ENDIAN LUAJIT_BE
-+ #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
-+ #define LJ_ARCH_HASFPU 1
-+ #endif
-+ #define LJ_ABI_EABI 1
-+ #define LJ_TARGET_S390 1
-+ #define LJ_TARGET_EHRETREG 0
-+ #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-+ #define LJ_TARGET_MASKSHIFT 0
-+ #define LJ_TARGET_MASKROT 1
-+ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
-+ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-+
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
- #ifndef LJ_ARCH_ENDIAN
-@@ -379,6 +399,10 @@
- #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
- #error "Need at least Clang 3.5 or newer"
- #endif
-+#elif LJ_TARGET_S390
-+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
-+#error "Need at least GCC 4.2 or newer"
-+#endif
- #else
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
- #error "Need at least GCC 4.8 or newer"
-
-From f1f03ec44bdcf0228cac0f090c83883a920bfa0c Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Thu, 10 Nov 2016 19:00:41 +0530
-Subject: [PATCH 004/260] Copy of dasm_arm64.lua file, with few changes
-
-Have changed few sections of file, other part is common across architectures
----
- dynasm/dasm_s390x.lua | 1177 +++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 1177 insertions(+)
- create mode 100644 dynasm/dasm_s390x.lua
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-new file mode 100644
-index 000000000..a0a50e1e1
---- /dev/null
-+++ b/dynasm/dasm_s390x.lua
-@@ -0,0 +1,1177 @@
-+------------------------------------------------------------------------------
-+-- DynASM s390x module.
-+--
-+-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-+-- See dynasm.lua for full copyright notice.
-+------------------------------------------------------------------------------
-+
-+-- Module information:
-+local _info = {
-+ arch = "s390x",
-+ description = "DynASM s390x module",
-+ version = "1.4.0",
-+ vernum = 10400,
-+ release = "2015-10-18",
-+ author = "Mike Pall",
-+ license = "MIT",
-+}
-+
-+-- Exported glue functions for the arch-specific module.
-+local _M = { _info = _info }
-+
-+-- Cache library functions.
-+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
-+local assert, setmetatable, rawget = assert, setmetatable, rawget
-+local _s = string
-+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
-+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
-+local concat, sort, insert = table.concat, table.sort, table.insert
-+local bit = bit or require("bit")
-+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
-+local ror, tohex = bit.ror, bit.tohex
-+
-+-- Inherited tables and callbacks.
-+local g_opt, g_arch
-+local wline, werror, wfatal, wwarn
-+
-+-- Action name list.
-+-- CHECK: Keep this in sync with the C code!
-+local action_names = {
-+ "STOP", "SECTION", "ESC", "REL_EXT",
-+ "ALIGN", "REL_LG", "LABEL_LG",
-+ "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
-+}
-+
-+-- Maximum number of section buffer positions for dasm_put().
-+-- CHECK: Keep this in sync with the C code!
-+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
-+
-+-- Action name -> action number.
-+local map_action = {}
-+for n,name in ipairs(action_names) do
-+ map_action[name] = n-1
-+end
-+
-+-- Action list buffer.
-+local actlist = {}
-+
-+-- Argument list for next dasm_put(). Start with offset 0 into action list.
-+local actargs = { 0 }
-+
-+-- Current number of section buffer positions for dasm_put().
-+local secpos = 1
-+
-+------------------------------------------------------------------------------
-+
-+-- Dump action names and numbers.
-+local function dumpactions(out)
-+ out:write("DynASM encoding engine action codes:\n")
-+ for n,name in ipairs(action_names) do
-+ local num = map_action[name]
-+ out:write(format(" %-10s %02X %d\n", name, num, num))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write action list buffer as a huge static C array.
-+local function writeactions(out, name)
-+ local nn = #actlist
-+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
-+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
-+ for i = 1,nn-1 do
-+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
-+ end
-+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Add word to action list.
-+local function wputxw(n)
-+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
-+ actlist[#actlist+1] = n
-+end
-+
-+-- Add action to list with optional arg. Advance buffer pos, too.
-+local function waction(action, val, a, num)
-+ local w = assert(map_action[action], "bad action name `"..action.."'")
-+ wputxw(w * 0x10000 + (val or 0))
-+ if a then actargs[#actargs+1] = a end
-+ if a or num then secpos = secpos + (num or 1) end
-+end
-+
-+-- Flush action list (intervening C code or buffer pos overflow).
-+local function wflush(term)
-+ if #actlist == actargs[1] then return end -- Nothing to flush.
-+ if not term then waction("STOP") end -- Terminate action list.
-+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
-+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
-+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
-+end
-+
-+-- Put escaped word.
-+local function wputw(n)
-+ if n <= 0x000fffff then waction("ESC") end
-+ wputxw(n)
-+end
-+
-+-- Reserve position for word.
-+local function wpos()
-+ local pos = #actlist+1
-+ actlist[pos] = ""
-+ return pos
-+end
-+
-+-- Store word to reserved position.
-+local function wputpos(pos, n)
-+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
-+ if n <= 0x000fffff then
-+ insert(actlist, pos+1, n)
-+ n = map_action.ESC * 0x10000
-+ end
-+ actlist[pos] = n
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Global label name -> global label number. With auto assignment on 1st use.
-+local next_global = 20
-+local map_global = setmetatable({}, { __index = function(t, name)
-+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
-+ local n = next_global
-+ if n > 2047 then werror("too many global labels") end
-+ next_global = n + 1
-+ t[name] = n
-+ return n
-+end})
-+
-+-- Dump global labels.
-+local function dumpglobals(out, lvl)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("Global labels:\n")
-+ for i=20,next_global-1 do
-+ out:write(format(" %s\n", t[i]))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write global label enum.
-+local function writeglobals(out, prefix)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("enum {\n")
-+ for i=20,next_global-1 do
-+ out:write(" ", prefix, t[i], ",\n")
-+ end
-+ out:write(" ", prefix, "_MAX\n};\n")
-+end
-+
-+-- Write global label names.
-+local function writeglobalnames(out, name)
-+ local t = {}
-+ for name, n in pairs(map_global) do t[n] = name end
-+ out:write("static const char *const ", name, "[] = {\n")
-+ for i=20,next_global-1 do
-+ out:write(" \"", t[i], "\",\n")
-+ end
-+ out:write(" (const char *)0\n};\n")
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Extern label name -> extern label number. With auto assignment on 1st use.
-+local next_extern = 0
-+local map_extern_ = {}
-+local map_extern = setmetatable({}, { __index = function(t, name)
-+ -- No restrictions on the name for now.
-+ local n = next_extern
-+ if n > 2047 then werror("too many extern labels") end
-+ next_extern = n + 1
-+ t[name] = n
-+ map_extern_[n] = name
-+ return n
-+end})
-+
-+-- Dump extern labels.
-+local function dumpexterns(out, lvl)
-+ out:write("Extern labels:\n")
-+ for i=0,next_extern-1 do
-+ out:write(format(" %s\n", map_extern_[i]))
-+ end
-+ out:write("\n")
-+end
-+
-+-- Write extern label names.
-+local function writeexternnames(out, name)
-+ out:write("static const char *const ", name, "[] = {\n")
-+ for i=0,next_extern-1 do
-+ out:write(" \"", map_extern_[i], "\",\n")
-+ end
-+ out:write(" (const char *)0\n};\n")
-+end
-+
-+------------------------------------------------------------------------------
-+
-+-- Arch-specific maps.
-+-- TODO: add s390x related register names
-+-- Ext. register name -> int. name.
-+--local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
-+local map_archdef = {}
-+
-+-- Int. register name -> ext. name.
-+-- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
-+local map_reg_rev = {}
-+
-+local map_type = {} -- Type name -> { ctype, reg }
-+local ctypenum = 0 -- Type number (for Dt... macros).
-+
-+-- Reverse defines for registers.
-+function _M.revdef(s)
-+ return map_reg_rev[s] or s
-+end
-+-- not sure of these
-+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
-+
-+local map_extend = {
-+ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
-+ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
-+}
-+
-+local map_cond = {
-+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
-+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
-+ hs = 2, lo = 3,
-+}
-+
-+------------------------------------------------------------------------------
-+
-+local parse_reg_type
-+
-+
-+local function parse_gpr(expr)
-+ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
-+ local tp = map_type[tname or expr]
-+ if tp then
-+ local reg = ovreg or tp.reg
-+ if not reg then
-+ werror("type `"..(tname or expr).."' needs a register override")
-+ end
-+ expr = reg
-+ end
-+ local r = match(expr, "^r([1-3]?[0-9])$")
-+ if r then
-+ r = tonumber(r)
-+ if r <= 31 then return r, tp end
-+ end
-+ werror("bad register name `"..expr.."'")
-+end
-+
-+local function parse_fpr(expr)
-+ local r = match(expr, "^f([1-3]?[0-9])$")
-+ if r then
-+ r = tonumber(r)
-+ if r <= 31 then return r end
-+ end
-+ werror("bad register name `"..expr.."'")
-+end
-+
-+
-+
-+
-+
-+local function parse_reg_base(expr)
-+ if expr == "sp" then return 0x3e0 end
-+ local base, tp = parse_reg(expr)
-+ if parse_reg_type ~= "x" then werror("bad register type") end
-+ parse_reg_type = false
-+ return shl(base, 5), tp
-+end
-+
-+local parse_ctx = {}
-+
-+local loadenv = setfenv and function(s)
-+ local code = loadstring(s, "")
-+ if code then setfenv(code, parse_ctx) end
-+ return code
-+end or function(s)
-+ return load(s, "", nil, parse_ctx)
-+end
-+
-+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
-+local function parse_number(n)
-+ local x = tonumber(n)
-+ if x then return x end
-+ local code = loadenv("return "..n)
-+ if code then
-+ local ok, y = pcall(code)
-+ if ok then return y end
-+ end
-+ return nil
-+end
-+
-+local function parse_imm(imm, bits, shift, scale, signed)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ local m = sar(n, scale)
-+ if shl(m, scale) == n then
-+ if signed then
-+ local s = sar(m, bits-1)
-+ if s == 0 then return shl(m, shift)
-+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
-+ else
-+ if sar(m, bits) == 0 then return shl(m, shift) end
-+ end
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm12(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ if shr(n, 12) == 0 then
-+ return shl(n, 10)
-+ elseif band(n, 0xff000fff) == 0 then
-+ return shr(n, 2) + 0x00400000
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM12", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm13(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ local r64 = parse_reg_type == "x"
-+ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
-+ local inv = false
-+ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
-+ local t = {}
-+ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
-+ local b = table.concat(t)
-+ b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
-+ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
-+ if p0 then
-+ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
-+ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
-+ local s = band(-2*w, 0x3f) - 1
-+ if w == 64 then s = s + 0x1000 end
-+ if inv then
-+ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
-+ else
-+ return shl(w-#p0, 16) + shl(s+#p1, 10)
-+ end
-+ end
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ elseif r64 then
-+ waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
-+ actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
-+ return 0
-+ else
-+ waction("IMM13W", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm6(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ if n >= 0 and n <= 63 then
-+ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMM6", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_imm_load(imm, scale)
-+ local n = parse_number(imm)
-+ if n then
-+ local m = sar(n, scale)
-+ if shl(m, scale) == n and m >= 0 and m < 0x1000 then
-+ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
-+ elseif n >= -256 and n < 256 then
-+ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ waction("IMML", 0, imm)
-+ return 0
-+ end
-+end
-+
-+local function parse_fpimm(imm)
-+ imm = match(imm, "^#(.*)$")
-+ if not imm then werror("expected immediate operand") end
-+ local n = parse_number(imm)
-+ if n then
-+ local m, e = math.frexp(n)
-+ local s, e2 = 0, band(e-2, 7)
-+ if m < 0 then m = -m; s = 0x00100000 end
-+ m = m*32-16
-+ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
-+ return s + shl(e2, 17) + shl(m, 13)
-+ end
-+ werror("out of range immediate `"..imm.."'")
-+ else
-+ werror("NYI fpimm action")
-+ end
-+end
-+
-+local function parse_shift(expr)
-+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-+ s = map_shift[s]
-+ if not s then werror("expected shift operand") end
-+ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
-+end
-+
-+local function parse_lslx16(expr)
-+ local n = match(expr, "^lsl%s*#(%d+)$")
-+ n = tonumber(n)
-+ if not n then werror("expected shift operand") end
-+ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
-+ werror("bad shift amount")
-+ end
-+ return shl(n, 17)
-+end
-+
-+local function parse_extend(expr)
-+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-+ if s == "lsl" then
-+ s = parse_reg_type == "x" and 3 or 2
-+ else
-+ s = map_extend[s]
-+ end
-+ if not s then werror("expected extend operand") end
-+ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
-+end
-+
-+local function parse_cond(expr, inv)
-+ local c = map_cond[expr]
-+ if not c then werror("expected condition operand") end
-+ return shl(bit.bxor(c, inv), 12)
-+end
-+
-+local function parse_load(params, nparams, n, op)
-+ if params[n+2] then werror("too many operands") end
-+ local pn, p2 = params[n], params[n+1]
-+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-+ if not p1 then
-+ if not p2 then
-+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-+ if reg and tailr ~= "" then
-+ local base, tp = parse_reg_base(reg)
-+ if tp then
-+ waction("IMML", 0, format(tp.ctypefmt, tailr))
-+ return op + base
-+ end
-+ end
-+ end
-+ werror("expected address operand")
-+ end
-+ local scale = shr(op, 30)
-+ if p2 then
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
-+ elseif wb == "!" then
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-+ if not p1a then werror("bad use of '!'") end
-+ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
-+ else
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
-+ op = op + parse_reg_base(p1a)
-+ if p2a ~= "" then
-+ local imm = match(p2a, "^,%s*#(.*)$")
-+ if imm then
-+ op = op + parse_imm_load(imm, scale)
-+ else
-+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-+ op = op + shl(parse_reg(p2b), 16) + 0x00200800
-+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
-+ werror("bad index register type")
-+ end
-+ if p3b == "" then
-+ if parse_reg_type ~= "x" then werror("bad index register type") end
-+ op = op + 0x6000
-+ else
-+ if p3s == "" or p3s == "#0" then
-+ elseif p3s == "#"..scale then
-+ op = op + 0x1000
-+ else
-+ werror("bad scale")
-+ end
-+ if parse_reg_type == "x" then
-+ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
-+ elseif p3b == "sxtx" then op = op + 0xe000
-+ else
-+ werror("bad extend/shift specifier")
-+ end
-+ else
-+ if p3b == "uxtw" then op = op + 0x4000
-+ elseif p3b == "sxtw" then op = op + 0xc000
-+ else
-+ werror("bad extend/shift specifier")
-+ end
-+ end
-+ end
-+ end
-+ else
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + 0x01000000
-+ end
-+ end
-+ return op
-+end
-+
-+local function parse_load_pair(params, nparams, n, op)
-+ if params[n+2] then werror("too many operands") end
-+ local pn, p2 = params[n], params[n+1]
-+ local scale = shr(op, 30) == 0 and 2 or 3
-+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-+ if not p1 then
-+ if not p2 then
-+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-+ if reg and tailr ~= "" then
-+ local base, tp = parse_reg_base(reg)
-+ if tp then
-+ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
-+ return op + base + 0x01000000
-+ end
-+ end
-+ end
-+ werror("expected address operand")
-+ end
-+ if p2 then
-+ if wb == "!" then werror("bad use of '!'") end
-+ op = op + 0x00800000
-+ else
-+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-+ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
-+ op = op + (wb == "!" and 0x01800000 or 0x01000000)
-+ end
-+ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
-+end
-+
-+local function parse_label(label, def)
-+ local prefix = sub(label, 1, 2)
-+ -- =>label (pc label reference)
-+ if prefix == "=>" then
-+ return "PC", 0, sub(label, 3)
-+ end
-+ -- ->name (global label reference)
-+ if prefix == "->" then
-+ return "LG", map_global[sub(label, 3)]
-+ end
-+ if def then
-+ -- [1-9] (local label definition)
-+ if match(label, "^[1-9]$") then
-+ return "LG", 10+tonumber(label)
-+ end
-+ else
-+ -- [<>][1-9] (local label reference)
-+ local dir, lnum = match(label, "^([<>])([1-9])$")
-+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
-+ return "LG", lnum + (dir == ">" and 0 or 10)
-+ end
-+ -- extern label (extern label reference)
-+ local extname = match(label, "^extern%s+(%S+)$")
-+ if extname then
-+ return "EXT", map_extern[extname]
-+ end
-+ end
-+ werror("bad label `"..label.."'")
-+end
-+
-+local function branch_type(op)
-+ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
-+ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
-+ band(op, 0x3b000000) == 0x18000000 then
-+ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
-+ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
-+ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
-+ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
-+ else
-+ assert(false, "unknown branch type")
-+ end
-+end
-+
-+------------------------------------------------------------------------------
-+
-+local map_op, op_template
-+
-+local function op_alias(opname, f)
-+ return function(params, nparams)
-+ if not params then return "-> "..opname:sub(1, -3) end
-+ f(params, nparams)
-+ op_template(params, map_op[opname], nparams)
-+ end
-+end
-+
-+local function alias_bfx(p)
-+ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
-+end
-+
-+local function alias_bfiz(p)
-+ parse_reg(p[1])
-+ if parse_reg_type == "w" then
-+ p[3] = "#-("..p[3]:sub(2)..")%32"
-+ p[4] = "#("..p[4]:sub(2)..")-1"
-+ else
-+ p[3] = "#-("..p[3]:sub(2)..")%64"
-+ p[4] = "#("..p[4]:sub(2)..")-1"
-+ end
-+end
-+
-+local alias_lslimm = op_alias("ubfm_4", function(p)
-+ parse_reg(p[1])
-+ local sh = p[3]:sub(2)
-+ if parse_reg_type == "w" then
-+ p[3] = "#-("..sh..")%32"
-+ p[4] = "#31-("..sh..")"
-+ else
-+ p[3] = "#-("..sh..")%64"
-+ p[4] = "#63-("..sh..")"
-+ end
-+end)
-+
-+-- Template strings for ARM instructions.
-+map_op = {
-+ -- Basic data processing instructions.
-+ add_2 = "00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
-+
-+-- and has several possible ways, need to find one, currently added two type of
-+ and_2 = "0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE| 00000000b9e40000RRF-a",
-+ and_c = "0000000000d40000SS-a",
-+ and_i = "0000000000940000SI|00000000eb540000SIY",
-+
-+and_2 = "0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
-+ and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
-+ and_c = "0000000000d40000SS-a",
-+ and_i = "0000000000940000SI",
-+ and_i4 = "00000000eb540000SIY"
-+ and_i3 = "000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
-+ --branch related instrcutions
-+ bal = "0000000000450000RX-a",
-+ balr = "0000000000050000RR",
-+ bas = "00000000004d0000RX-a",
-+ basr = "00000000000d0000RR",
-+ bassm = "00000000000c0000RR",
-+ bsm = "00000000000b0000RR",
-+ bc = "0000000000470000Rx-b",
-+ bcr = "00000000000070000RR",
-+ bct = "0000000000460000RX-a",
-+ bctr = "0000000000060000RR",
-+ bctg = "00000000e3460000RXY-a",
-+ bctgr = "00000000b9460000RRE",
-+ bxh = "0000000000860000RS-a",
-+ bxhg = "00000000eb440000RSY-a",
-+ bxle = "0000000000870000RS-a",
-+ bxleg = "00000000eb450000RSY-a",
-+ bras = "000000000a750000RI-b",
-+ brasl = "000000000c050000RIL-b",
-+ brc = "000000000a740000RI-c",
-+ brcl = "000000000c040000RIL-c",
-+ brct = "000000000a760000RI-b",
-+ brctg = "000000000a770000RI-b",
-+ brctg = "00000000occ60000RIL-b",
-+ brxh = "0000000000840000RSI",
-+ brxhg = "00000000ec440000RIE-e",
-+ brxle = "0000000000850000RSI",
-+ brxlg = "00000000ec450000RIE-e",
-+
-+ ----subtraction (basic operation)
-+ sub = "00000000005b0000RX-a"
-+ sr = "00000000001b0000RR"
-+ srk = "00000000b9f90000RRF-a"
-+ sy = "00000000e35b0000RXY-a"
-+ sg = "00000000e3090000RXY-a"
-+ sgr = "00000000b9090000RRE"
-+ sgrk = "00000000b9e90000RRF-a"
-+ sgf = "00000000e3190000RXY-a"
-+ sgfr = "00000000b9190000RRE"
-+ sh = "00000000004b0000RX-a"
-+ shy = "00000000e37b0000RXY-a"
-+ shhhr = "00000000b9c90000RRF-a"
-+ shhlr = "00000000b9d90000RX-a"
-+ sl = "00000000005f0000RX-a"
-+ slr = "00000000001f0000RR"
-+ slrk = "00000000b9f80000RR"
-+ sly = "00000000e35f0000RXY-a",
-+ slg = "00000000e30b0000RXY-a",
-+ slgr = "00000000b9080000RRE",
-+ slgrk = "00000000b9eb0000RRF-a",
-+ slgf = "00000000e3180000RXY-a",
-+ slgfr = "00000000b91b0000RRE",
-+ slhhhr = "00000000b9cb0000RRF-a",
-+ slhhlr = "00000000b9db0000RRF-a",
-+ slfi = "000000000c250000RIL-a",
-+ slgfi = "000000000c240000RIL-a",
-+ slb = "00000000e3990000RXY-a",
-+ slbr = "00000000b9990000RRE" ,
-+ slbg = "00000000e3890000RXY-a",
-+ slbgr = "00000000b9890000RXY-a",
-+
-+ cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
-+ cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a| 00000000b9300000RRE",
-+
-+ div_2 = "00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
-+ div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
-+ div_sing ="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
-+
-+ eor_2 = "0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
-+ eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
-+ eor_c = "0000000000d70000SS-a",
-+ eor_i = "0000000000970000SI| 00000000eb570000|000000000c060000a|000000000c070000RIL-a",
-+
-+ -- load instruction to be added and the following instructions need to be changed (are not s390x related)
-+
-+ neg_2 = "4b0003e0DMg",
-+ neg_3 = "4b0003e0DMSg",
-+ negs_2 = "6b0003e0DMg",
-+ negs_3 = "6b0003e0DMSg",
-+ adc_3 = "1a000000DNMg",
-+ adcs_3 = "3a000000DNMg",
-+ sbc_3 = "5a000000DNMg",
-+ sbcs_3 = "7a000000DNMg",
-+ ngc_2 = "5a0003e0DMg",
-+ ngcs_2 = "7a0003e0DMg",
-+ and_3 = "0a000000DNMg|12000000pDNig",
-+ and_4 = "0a000000DNMSg",
-+ orr_3 = "2a000000DNMg|32000000pDNig",
-+ orr_4 = "2a000000DNMSg",
-+ eor_3 = "4a000000DNMg|52000000pDNig",
-+ eor_4 = "4a000000DNMSg",
-+ ands_3 = "6a000000DNMg|72000000DNig",
-+ ands_4 = "6a000000DNMSg",
-+ tst_2 = "6a00001fNMg|7200001fNig",
-+ tst_3 = "6a00001fNMSg",
-+ bic_3 = "0a200000DNMg",
-+ bic_4 = "0a200000DNMSg",
-+ orn_3 = "2a200000DNMg",
-+ orn_4 = "2a200000DNMSg",
-+ eon_3 = "4a200000DNMg",
-+ eon_4 = "4a200000DNMSg",
-+ bics_3 = "6a200000DNMg",
-+ bics_4 = "6a200000DNMSg",
-+ movn_2 = "12800000DWg",
-+ movn_3 = "12800000DWRg",
-+ movz_2 = "52800000DWg",
-+ movz_3 = "52800000DWRg",
-+ movk_2 = "72800000DWg",
-+ movk_3 = "72800000DWRg",
-+ -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
-+ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
-+ mov_3 = "2a0003e0DMSg",
-+ mvn_2 = "2a2003e0DMg",
-+ mvn_3 = "2a2003e0DMSg",
-+ adr_2 = "10000000DBx",
-+ adrp_2 = "90000000DBx",
-+ csel_4 = "1a800000DNMCg",
-+ csinc_4 = "1a800400DNMCg",
-+ csinv_4 = "5a800000DNMCg",
-+ csneg_4 = "5a800400DNMCg",
-+ cset_2 = "1a9f07e0Dcg",
-+ csetm_2 = "5a9f03e0Dcg",
-+ cinc_3 = "1a800400DNmcg",
-+ cinv_3 = "5a800000DNmcg",
-+ cneg_3 = "5a800400DNmcg",
-+ ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
-+ ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
-+ madd_4 = "1b000000DNMAg",
-+ msub_4 = "1b008000DNMAg",
-+ mul_3 = "1b007c00DNMg",
-+ mneg_3 = "1b00fc00DNMg",
-+ smaddl_4 = "9b200000DxNMwAx",
-+ smsubl_4 = "9b208000DxNMwAx",
-+ smull_3 = "9b207c00DxNMw",
-+ smnegl_3 = "9b20fc00DxNMw",
-+ smulh_3 = "9b407c00DNMx",
-+ umaddl_4 = "9ba00000DxNMwAx",
-+ umsubl_4 = "9ba08000DxNMwAx",
-+ umull_3 = "9ba07c00DxNMw",
-+ umnegl_3 = "9ba0fc00DxNMw",
-+ umulh_3 = "9bc07c00DNMx",
-+ udiv_3 = "1ac00800DNMg",
-+ sdiv_3 = "1ac00c00DNMg",
-+ -- Bit operations.
-+ sbfm_4 = "13000000DN12w|93400000DN12x",
-+ bfm_4 = "33000000DN12w|b3400000DN12x",
-+ ubfm_4 = "53000000DN12w|d3400000DN12x",
-+ extr_4 = "13800000DNM2w|93c00000DNM2x",
-+ sxtb_2 = "13001c00DNw|93401c00DNx",
-+ sxth_2 = "13003c00DNw|93403c00DNx",
-+ sxtw_2 = "93407c00DxNw",
-+ uxtb_2 = "53001c00DNw",
-+ uxth_2 = "53003c00DNw",
-+ sbfx_4 = op_alias("sbfm_4", alias_bfx),
-+ bfxil_4 = op_alias("bfm_4", alias_bfx),
-+ ubfx_4 = op_alias("ubfm_4", alias_bfx),
-+ sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
-+ bfi_4 = op_alias("bfm_4", alias_bfiz),
-+ ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
-+ lsl_3 = function(params, nparams)
-+ if params and params[3]:byte() == 35 then
-+ return alias_lslimm(params, nparams)
-+ else
-+ return op_template(params, "1ac02000DNMg", nparams)
-+ end
-+ end,
-+ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
-+ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
-+ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
-+ clz_2 = "5ac01000DNg",
-+ cls_2 = "5ac01400DNg",
-+ rbit_2 = "5ac00000DNg",
-+ rev_2 = "5ac00800DNw|dac00c00DNx",
-+ rev16_2 = "5ac00400DNg",
-+ rev32_2 = "dac00800DNx",
-+ -- Loads and stores.
-+ ["strb_*"] = "38000000DwL",
-+ ["ldrb_*"] = "38400000DwL",
-+ ["ldrsb_*"] = "38c00000DwL|38800000DxL",
-+ ["strh_*"] = "78000000DwL",
-+ ["ldrh_*"] = "78400000DwL",
-+ ["ldrsh_*"] = "78c00000DwL|78800000DxL",
-+ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
-+ ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
-+ ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-+ -- NOTE: ldur etc. are handled by ldr et al.
-+ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-+ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
-+ ["ldpsw_*"] = "68400000DAxP",
-+ -- Branches.
-+ b_1 = "14000000B",
-+ bl_1 = "94000000B",
-+ blr_1 = "d63f0000Nx",
-+ br_1 = "d61f0000Nx",
-+ ret_0 = "d65f03c0",
-+ ret_1 = "d65f0000Nx",
-+ -- b.cond is added below.
-+ cbz_2 = "34000000DBg",
-+ cbnz_2 = "35000000DBg",
-+ tbz_3 = "36000000DTBw|36000000DTBx",
-+ tbnz_3 = "37000000DTBw|37000000DTBx",
-+ -- Miscellaneous instructions.
-+ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
-+ -- TODO: sys, sysl, ic, dc, at, tlbi
-+ -- TODO: hint, yield, wfe, wfi, sev, sevl
-+ -- TODO: clrex, dsb, dmb, isb
-+ nop_0 = "d503201f",
-+ brk_0 = "d4200000",
-+ brk_1 = "d4200000W",
-+ -- Floating point instructions.
-+ fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
-+ fabs_2 = "1e20c000DNf",
-+ fneg_2 = "1e214000DNf",
-+ fsqrt_2 = "1e21c000DNf",
-+ fcvt_2 = "1e22c000DdNs|1e624000DsNd",
-+ -- TODO: half-precision and fixed-point conversions.
-+ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
-+ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
-+ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
-+ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
-+ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
-+ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
-+ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
-+ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
-+ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
-+ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
-+ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
-+ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
-+ frintn_2 = "1e244000DNf",
-+ frintp_2 = "1e24c000DNf",
-+ frintm_2 = "1e254000DNf",
-+ frintz_2 = "1e25c000DNf",
-+ frinta_2 = "1e264000DNf",
-+ frintx_2 = "1e274000DNf",
-+ frinti_2 = "1e27c000DNf",
-+ fadd_3 = "1e202800DNMf",
-+ fsub_3 = "1e203800DNMf",
-+ fmul_3 = "1e200800DNMf",
-+ fnmul_3 = "1e208800DNMf",
-+ fdiv_3 = "1e201800DNMf",
-+ fmadd_4 = "1f000000DNMAf",
-+ fmsub_4 = "1f008000DNMAf",
-+ fnmadd_4 = "1f200000DNMAf",
-+ fnmsub_4 = "1f208000DNMAf",
-+ fmax_3 = "1e204800DNMf",
-+ fmaxnm_3 = "1e206800DNMf",
-+ fmin_3 = "1e205800DNMf",
-+ fminnm_3 = "1e207800DNMf",
-+ fcmp_2 = "1e202000NMf|1e202008NZf",
-+ fcmpe_2 = "1e202010NMf|1e202018NZf",
-+ fccmp_4 = "1e200400NMVCf",
-+ fccmpe_4 = "1e200410NMVCf",
-+ fcsel_4 = "1e200c00DNMCf",
-+ -- TODO: crc32*, aes*, sha*, pmull
-+ -- TODO: SIMD instructions.
-+}
-+for cond,c in pairs(map_cond) do
-+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-+end
-+------------------------------------------------------------------------------
-+-- Handle opcodes defined with template strings.
-+local function parse_template(params, template, nparams, pos)
-+ local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are trailing zeros added after the instruction
-+ -- 00000000005a0000 converts to 90
-+ local n,rs = 1,26
-+
-+ parse_reg_type = false
-+ -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
-+ for p in gmatch(sub(template, 17), ".") do
-+ local q = params[n]
-+ if p == "R" then
-+ op = op + parse_reg(q); n = n + 1
-+ elseif p == "N" then
-+ op = op + shl(parse_reg(q), 5); n = n + 1
-+ elseif p == "M" then
-+ op = op + shl(parse_reg(q), 16); n = n + 1
-+ elseif p == "A" then
-+ op = op + shl(parse_reg(q), 10); n = n + 1
-+ elseif p == "m" then
-+ op = op + shl(parse_reg(params[n-1]), 16)
-+ elseif p == "p" then
-+ if q == "sp" then params[n] = "@x31" end
-+ elseif p == "g" then
-+ if parse_reg_type == "x" then
-+ op = op + 0x80000000
-+ elseif parse_reg_type ~= "w" then
-+ werror("bad register type")
-+ end
-+ parse_reg_type = false
-+ elseif p == "f" then
-+ if parse_reg_type == "d" then
-+ op = op + 0x00400000
-+ elseif parse_reg_type ~= "s" then
-+ werror("bad register type")
-+ end
-+ parse_reg_type = false
-+ elseif p == "x" or p == "w" or p == "d" or p == "s" then
-+ if parse_reg_type ~= p then
-+ werror("register size mismatch")
-+ end
-+ parse_reg_type = false
-+ elseif p == "L" then
-+ op = parse_load(params, nparams, n, op)
-+ elseif p == "P" then
-+ op = parse_load_pair(params, nparams, n, op)
-+ elseif p == "B" then
-+ local mode, v, s = parse_label(q, false); n = n + 1
-+ local m = branch_type(op)
-+ waction("REL_"..mode, v+m, s, 1)
-+ elseif p == "I" then
-+ op = op + parse_imm12(q); n = n + 1
-+ elseif p == "i" then
-+ op = op + parse_imm13(q); n = n + 1
-+ elseif p == "W" then
-+ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
-+ elseif p == "T" then
-+ op = op + parse_imm6(q); n = n + 1
-+ elseif p == "1" then
-+ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
-+ elseif p == "2" then
-+ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
-+ elseif p == "5" then
-+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
-+ elseif p == "V" then
-+ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
-+ elseif p == "F" then
-+ op = op + parse_fpimm(q); n = n + 1
-+ elseif p == "Z" then
-+ if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
-+ n = n + 1
-+ elseif p == "S" then
-+ op = op + parse_shift(q); n = n + 1
-+ elseif p == "X" then
-+ op = op + parse_extend(q); n = n + 1
-+ elseif p == "R" then
-+ op = op + parse_lslx16(q); n = n + 1
-+ elseif p == "C" then
-+ op = op + parse_cond(q, 0); n = n + 1
-+ elseif p == "c" then
-+ op = op + parse_cond(q, 1); n = n + 1
-+ else
-+ assert(false)
-+ end
-+ end
-+ wputpos(pos, op)
-+end
-+function op_template(params, template, nparams)
-+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
-+ -- Limit number of section buffer positions used by a single dasm_put().
-+ -- A single opcode needs a maximum of 3 positions.
-+ if secpos+3 > maxsecpos then wflush() end
-+ local pos = wpos()
-+ local lpos, apos, spos = #actlist, #actargs, secpos
-+ local ok, err
-+ for t in gmatch(template, "[^|]+") do
-+ ok, err = pcall(parse_template, params, t, nparams, pos)
-+ if ok then return end
-+ secpos = spos
-+ actlist[lpos+1] = nil
-+ actlist[lpos+2] = nil
-+ actlist[lpos+3] = nil
-+ actargs[apos+1] = nil
-+ actargs[apos+2] = nil
-+ actargs[apos+3] = nil
-+ end
-+ error(err, 0)
-+end
-+map_op[".template__"] = op_template
-+------------------------------------------------------------------------------
-+-- Pseudo-opcode to mark the position where the action list is to be emitted.
-+map_op[".actionlist_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeactions(out, name) end)
-+end
-+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
-+map_op[".globals_1"] = function(params)
-+ if not params then return "prefix" end
-+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeglobals(out, prefix) end)
-+end
-+-- Pseudo-opcode to mark the position where the global names are to be emitted.
-+map_op[".globalnames_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeglobalnames(out, name) end)
-+end
-+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
-+map_op[".externnames_1"] = function(params)
-+ if not params then return "cvar" end
-+ local name = params[1] -- No syntax check. You get to keep the pieces.
-+ wline(function(out) writeexternnames(out, name) end)
-+end
-+------------------------------------------------------------------------------
-+-- Label pseudo-opcode (converted from trailing colon form).
-+map_op[".label_1"] = function(params)
-+ if not params then return "[1-9] | ->global | =>pcexpr" end
-+ if secpos+1 > maxsecpos then wflush() end
-+ local mode, n, s = parse_label(params[1], true)
-+ if mode == "EXT" then werror("bad label definition") end
-+ waction("LABEL_"..mode, n, s, 1)
-+end
-+------------------------------------------------------------------------------
-+-- Pseudo-opcodes for data storage.
-+map_op[".long_*"] = function(params)
-+ if not params then return "imm..." end
-+ for _,p in ipairs(params) do
-+ local n = tonumber(p)
-+ if not n then werror("bad immediate `"..p.."'") end
-+ if n < 0 then n = n + 2^32 end
-+ wputw(n)
-+ if secpos+2 > maxsecpos then wflush() end
-+ end
-+end
-+-- Alignment pseudo-opcode.
-+map_op[".align_1"] = function(params)
-+ if not params then return "numpow2" end
-+ if secpos+1 > maxsecpos then wflush() end
-+ local align = tonumber(params[1])
-+ if align then
-+ local x = align
-+ -- Must be a power of 2 in the range (2 ... 256).
-+ for i=1,8 do
-+ x = x / 2
-+ if x == 1 then
-+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
-+ return
-+ end
-+ end
-+ end
-+ werror("bad alignment")
-+end
-+------------------------------------------------------------------------------
-+-- Pseudo-opcode for (primitive) type definitions (map to C types).
-+map_op[".type_3"] = function(params, nparams)
-+ if not params then
-+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
-+ end
-+ local name, ctype, reg = params[1], params[2], params[3]
-+ if not match(name, "^[%a_][%w_]*$") then
-+ werror("bad type name `"..name.."'")
-+ end
-+ local tp = map_type[name]
-+ if tp then
-+ werror("duplicate type `"..name.."'")
-+ end
-+ -- Add #type to defines. A bit unclean to put it in map_archdef.
-+ map_archdef["#"..name] = "sizeof("..ctype..")"
-+ -- Add new type and emit shortcut define.
-+ local num = ctypenum + 1
-+ map_type[name] = {
-+ ctype = ctype,
-+ ctypefmt = format("Dt%X(%%s)", num),
-+ reg = reg,
-+ }
-+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
-+ ctypenum = num
-+end
-+map_op[".type_2"] = map_op[".type_3"]
-+-- Dump type definitions.
-+local function dumptypes(out, lvl)
-+ local t = {}
-+ for name in pairs(map_type) do t[#t+1] = name end
-+ sort(t)
-+ out:write("Type definitions:\n")
-+ for _,name in ipairs(t) do
-+ local tp = map_type[name]
-+ local reg = tp.reg or ""
-+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
-+ end
-+ out:write("\n")
-+end
-+------------------------------------------------------------------------------
-+-- Set the current section.
-+function _M.section(num)
-+ waction("SECTION", num)
-+ wflush(true) -- SECTION is a terminal action.
-+end
-+------------------------------------------------------------------------------
-+-- Dump architecture description.
-+function _M.dumparch(out)
-+ out:write(format("DynASM %s version %s, released %s\n\n",
-+ _info.arch, _info.version, _info.release))
-+ dumpactions(out)
-+end
-+-- Dump all user defined elements.
-+function _M.dumpdef(out, lvl)
-+ dumptypes(out, lvl)
-+ dumpglobals(out, lvl)
-+ dumpexterns(out, lvl)
-+end
-+------------------------------------------------------------------------------
-+-- Pass callbacks from/to the DynASM core.
-+function _M.passcb(wl, we, wf, ww)
-+ wline, werror, wfatal, wwarn = wl, we, wf, ww
-+ return wflush
-+end
-+-- Setup the arch-specific module.
-+function _M.setup(arch, opt)
-+ g_arch, g_opt = arch, opt
-+end
-+-- Merge the core maps and the arch-specific maps.
-+function _M.mergemaps(map_coreop, map_def)
-+ setmetatable(map_op, { __index = map_coreop })
-+ setmetatable(map_def, { __index = map_archdef })
-+ return map_op, map_def
-+end
-+return _M
-+------------------------------------------------------------------------------
-
-From 9583ba36deddbff4a8ffd734896c1b9787b1d2c8 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Thu, 10 Nov 2016 19:00:51 +0530
-Subject: [PATCH 005/260] Created s390x header file
-
-Currently copy of ppc.h, which is same as arm64.h, and added the architecture definition
----
- dynasm/dasm_s390x.h | 418 ++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 418 insertions(+)
- create mode 100644 dynasm/dasm_s390x.h
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-new file mode 100644
-index 000000000..577920ac9
---- /dev/null
-+++ b/dynasm/dasm_s390x.h
-@@ -0,0 +1,418 @@
-+/*
-+** DynASM s390x encoding engine.
-+** Copyright (C) 2005-2016 Mike Pall. All rights reserved.
-+** Released under the MIT license. See dynasm.lua for full copyright notice.
-+*/
-+
-+#include <stddef.h>
-+#include <stdarg.h>
-+#include <string.h>
-+#include <stdlib.h>
-+
-+#define DASM_ARCH "s390"
-+
-+#ifndef DASM_EXTERN
-+#define DASM_EXTERN(a,b,c,d) 0
-+#endif
-+
-+/* Action definitions. */
-+enum {
-+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
-+ /* The following actions need a buffer position. */
-+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
-+ /* The following actions also have an argument. */
-+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
-+ DASM__MAX
-+};
-+
-+/* Maximum number of section buffer positions for a single dasm_put() call. */
-+#define DASM_MAXSECPOS 25
-+
-+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
-+#define DASM_S_OK 0x00000000
-+#define DASM_S_NOMEM 0x01000000
-+#define DASM_S_PHASE 0x02000000
-+#define DASM_S_MATCH_SEC 0x03000000
-+#define DASM_S_RANGE_I 0x11000000
-+#define DASM_S_RANGE_SEC 0x12000000
-+#define DASM_S_RANGE_LG 0x13000000
-+#define DASM_S_RANGE_PC 0x14000000
-+#define DASM_S_RANGE_REL 0x15000000
-+#define DASM_S_UNDEF_LG 0x21000000
-+#define DASM_S_UNDEF_PC 0x22000000
-+
-+/* Macros to convert positions (8 bit section + 24 bit index). */
-+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
-+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
-+#define DASM_SEC2POS(sec) ((sec)<<24)
-+#define DASM_POS2SEC(pos) ((pos)>>24)
-+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
-+
-+/* Action list type. */
-+typedef const unsigned int *dasm_ActList;
-+
-+/* Per-section structure. */
-+typedef struct dasm_Section {
-+ int *rbuf; /* Biased buffer pointer (negative section bias). */
-+ int *buf; /* True buffer pointer. */
-+ size_t bsize; /* Buffer size in bytes. */
-+ int pos; /* Biased buffer position. */
-+ int epos; /* End of biased buffer position - max single put. */
-+ int ofs; /* Byte offset into section. */
-+} dasm_Section;
-+
-+/* Core structure holding the DynASM encoding state. */
-+struct dasm_State {
-+ size_t psize; /* Allocated size of this structure. */
-+ dasm_ActList actionlist; /* Current actionlist pointer. */
-+ int *lglabels; /* Local/global chain/pos ptrs. */
-+ size_t lgsize;
-+ int *pclabels; /* PC label chains/pos ptrs. */
-+ size_t pcsize;
-+ void **globals; /* Array of globals (bias -10). */
-+ dasm_Section *section; /* Pointer to active section. */
-+ size_t codesize; /* Total size of all code sections. */
-+ int maxsection; /* 0 <= sectionidx < maxsection. */
-+ int status; /* Status code. */
-+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
-+};
-+
-+/* The size of the core structure depends on the max. number of sections. */
-+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
-+
-+
-+/* Initialize DynASM state. */
-+void dasm_init(Dst_DECL, int maxsection)
-+{
-+ dasm_State *D;
-+ size_t psz = 0;
-+ int i;
-+ Dst_REF = NULL;
-+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
-+ D = Dst_REF;
-+ D->psize = psz;
-+ D->lglabels = NULL;
-+ D->lgsize = 0;
-+ D->pclabels = NULL;
-+ D->pcsize = 0;
-+ D->globals = NULL;
-+ D->maxsection = maxsection;
-+ for (i = 0; i < maxsection; i++) {
-+ D->sections[i].buf = NULL; /* Need this for pass3. */
-+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-+ D->sections[i].bsize = 0;
-+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
-+ }
-+}
-+
-+/* Free DynASM state. */
-+void dasm_free(Dst_DECL)
-+{
-+ dasm_State *D = Dst_REF;
-+ int i;
-+ for (i = 0; i < D->maxsection; i++)
-+ if (D->sections[i].buf)
-+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
-+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
-+ DASM_M_FREE(Dst, D, D->psize);
-+}
-+
-+/* Setup global label array. Must be called before dasm_setup(). */
-+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
-+{
-+ dasm_State *D = Dst_REF;
-+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
-+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
-+}
-+
-+/* Grow PC label array. Can be called after dasm_setup(), too. */
-+void dasm_growpc(Dst_DECL, unsigned int maxpc)
-+{
-+ dasm_State *D = Dst_REF;
-+ size_t osz = D->pcsize;
-+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
-+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
-+}
-+
-+/* Setup encoder. */
-+void dasm_setup(Dst_DECL, const void *actionlist)
-+{
-+ dasm_State *D = Dst_REF;
-+ int i;
-+ D->actionlist = (dasm_ActList)actionlist;
-+ D->status = DASM_S_OK;
-+ D->section = &D->sections[0];
-+ memset((void *)D->lglabels, 0, D->lgsize);
-+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
-+ for (i = 0; i < D->maxsection; i++) {
-+ D->sections[i].pos = DASM_SEC2POS(i);
-+ D->sections[i].ofs = 0;
-+ }
-+}
-+
-+
-+#ifdef DASM_CHECKS
-+#define CK(x, st) \
-+ do { if (!(x)) { \
-+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
-+#define CKPL(kind, st) \
-+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
-+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
-+#else
-+#define CK(x, st) ((void)0)
-+#define CKPL(kind, st) ((void)0)
-+#endif
-+
-+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
-+void dasm_put(Dst_DECL, int start, ...)
-+{
-+ va_list ap;
-+ dasm_State *D = Dst_REF;
-+ dasm_ActList p = D->actionlist + start;
-+ dasm_Section *sec = D->section;
-+ int pos = sec->pos, ofs = sec->ofs;
-+ int *b;
-+
-+ if (pos >= sec->epos) {
-+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
-+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
-+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
-+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
-+ }
-+
-+ b = sec->rbuf;
-+ b[pos++] = start;
-+
-+ va_start(ap, start);
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ if (action >= DASM__MAX) {
-+ ofs += 4;
-+ } else {
-+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
-+ switch (action) {
-+ case DASM_STOP: goto stop;
-+ case DASM_SECTION:
-+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
-+ D->section = &D->sections[n]; goto stop;
-+ case DASM_ESC: p++; ofs += 4; break;
-+ case DASM_REL_EXT: break;
-+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
-+ case DASM_REL_LG:
-+ n = (ins & 2047) - 10; pl = D->lglabels + n;
-+ /* Bkwd rel or global. */
-+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
-+ pl += 10; n = *pl;
-+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
-+ goto linkrel;
-+ case DASM_REL_PC:
-+ pl = D->pclabels + n; CKPL(pc, PC);
-+ putrel:
-+ n = *pl;
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
-+ b[pos] = -n;
-+ } else {
-+ linkrel:
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ *pl = pos;
-+ }
-+ pos++;
-+ break;
-+ case DASM_LABEL_LG:
-+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
-+ case DASM_LABEL_PC:
-+ pl = D->pclabels + n; CKPL(pc, PC);
-+ putlabel:
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
-+ }
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ break;
-+ case DASM_IMM:
-+#ifdef DASM_CHECKS
-+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
-+#endif
-+ n >>= ((ins>>10)&31);
-+#ifdef DASM_CHECKS
-+ if (ins & 0x8000)
-+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
-+ else
-+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
-+#endif
-+ b[pos++] = n;
-+ break;
-+ case DASM_IMMSH:
-+ CK((n >> 6) == 0, RANGE_I);
-+ b[pos++] = n;
-+ break;
-+ }
-+ }
-+ }
-+stop:
-+ va_end(ap);
-+ sec->pos = pos;
-+ sec->ofs = ofs;
-+}
-+#undef CK
-+
-+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
-+int dasm_link(Dst_DECL, size_t *szp)
-+{
-+ dasm_State *D = Dst_REF;
-+ int secnum;
-+ int ofs = 0;
-+
-+#ifdef DASM_CHECKS
-+ *szp = 0;
-+ if (D->status != DASM_S_OK) return D->status;
-+ {
-+ int pc;
-+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
-+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
-+ }
-+#endif
-+
-+ { /* Handle globals not defined in this translation unit. */
-+ int idx;
-+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
-+ int n = D->lglabels[idx];
-+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
-+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
-+ }
-+ }
-+
-+ /* Combine all code sections. No support for data sections (yet). */
-+ for (secnum = 0; secnum < D->maxsection; secnum++) {
-+ dasm_Section *sec = D->sections + secnum;
-+ int *b = sec->rbuf;
-+ int pos = DASM_SEC2POS(secnum);
-+ int lastpos = sec->pos;
-+
-+ while (pos != lastpos) {
-+ dasm_ActList p = D->actionlist + b[pos++];
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ switch (action) {
-+ case DASM_STOP: case DASM_SECTION: goto stop;
-+ case DASM_ESC: p++; break;
-+ case DASM_REL_EXT: break;
-+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
-+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
-+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-+ case DASM_IMM: case DASM_IMMSH: pos++; break;
-+ }
-+ }
-+ stop: (void)0;
-+ }
-+ ofs += sec->ofs; /* Next section starts right after current section. */
-+ }
-+
-+ D->codesize = ofs; /* Total size of all code sections */
-+ *szp = ofs;
-+ return DASM_S_OK;
-+}
-+
-+#ifdef DASM_CHECKS
-+#define CK(x, st) \
-+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
-+#else
-+#define CK(x, st) ((void)0)
-+#endif
-+
-+/* Pass 3: Encode sections. */
-+int dasm_encode(Dst_DECL, void *buffer)
-+{
-+ dasm_State *D = Dst_REF;
-+ char *base = (char *)buffer;
-+ unsigned int *cp = (unsigned int *)buffer;
-+ int secnum;
-+
-+ /* Encode all code sections. No support for data sections (yet). */
-+ for (secnum = 0; secnum < D->maxsection; secnum++) {
-+ dasm_Section *sec = D->sections + secnum;
-+ int *b = sec->buf;
-+ int *endb = sec->rbuf + sec->pos;
-+
-+ while (b != endb) {
-+ dasm_ActList p = D->actionlist + *b++;
-+ while (1) {
-+ unsigned int ins = *p++;
-+ unsigned int action = (ins >> 16);
-+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
-+ switch (action) {
-+ case DASM_STOP: case DASM_SECTION: goto stop;
-+ case DASM_ESC: *cp++ = *p++; break;
-+ case DASM_REL_EXT:
-+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
-+ goto patchrel;
-+ case DASM_ALIGN:
-+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
-+ break;
-+ case DASM_REL_LG:
-+ CK(n >= 0, UNDEF_LG);
-+ case DASM_REL_PC:
-+ CK(n >= 0, UNDEF_PC);
-+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
-+ patchrel:
-+ CK((n & 3) == 0 &&
-+ (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >>
-+ ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL);
-+ cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
-+ break;
-+ case DASM_LABEL_LG:
-+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
-+ break;
-+ case DASM_LABEL_PC: break;
-+ case DASM_IMM:
-+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
-+ break;
-+ case DASM_IMMSH:
-+ cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
-+ break;
-+ default: *cp++ = ins; break;
-+ }
-+ }
-+ stop: (void)0;
-+ }
-+ }
-+
-+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
-+ return DASM_S_PHASE;
-+ return DASM_S_OK;
-+}
-+#undef CK
-+
-+/* Get PC label offset. */
-+int dasm_getpclabel(Dst_DECL, unsigned int pc)
-+{
-+ dasm_State *D = Dst_REF;
-+ if (pc*sizeof(int) < D->pcsize) {
-+ int pos = D->pclabels[pc];
-+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
-+ if (pos > 0) return -1; /* Undefined. */
-+ }
-+ return -2; /* Unused or out of range. */
-+}
-+
-+#ifdef DASM_CHECKS
-+/* Optional sanity checker to call between isolated encoding steps. */
-+int dasm_checkstep(Dst_DECL, int secmatch)
-+{
-+ dasm_State *D = Dst_REF;
-+ if (D->status == DASM_S_OK) {
-+ int i;
-+ for (i = 1; i <= 9; i++) {
-+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
-+ D->lglabels[i] = 0;
-+ }
-+ }
-+ if (D->status == DASM_S_OK && secmatch >= 0 &&
-+ D->section != &D->sections[secmatch])
-+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
-+ return D->status;
-+}
-+#endif
-
-From 633376f0cace757869a4b055c6b6f5a7070d5169 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 11 Nov 2016 12:04:51 +0530
-Subject: [PATCH 006/260] Update lj_arch.h
-
-changed S390 to S390x
----
- src/lj_arch.h | 20 +++++---------------
- 1 file changed, 5 insertions(+), 15 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 5155bf691..2638a9412 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,7 +29,7 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
--#define LUAJIT_ARCH_S390 8
-+#define LUAJIT_ARCH_S390x 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -50,8 +50,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
--#elif defined(__s390__) || defined(__s390) || defined(__S390__) || defined(__S390) || defined(S390)
--#define LUAJIT_TARGET LUAJIT_ARCH_S390
-+#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390x
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
-@@ -235,20 +235,10 @@
-
- #elif LUAJIT_TARGET == LUAJIT_ARCH_S390
-
-- #define LJ_ARCH_NAME "s390"
-+ #define LJ_ARCH_NAME "s390x"
- #define LJ_ARCH_BITS 64
- #define LJ_ARCH_ENDIAN LUAJIT_BE
-- #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
-- #define LJ_ARCH_HASFPU 1
-- #endif
-- #define LJ_ABI_EABI 1
- #define LJ_TARGET_S390 1
-- #define LJ_TARGET_EHRETREG 0
-- #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-- #define LJ_TARGET_MASKSHIFT 0
-- #define LJ_TARGET_MASKROT 1
-- #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
-- #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
-@@ -399,7 +389,7 @@
- #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
- #error "Need at least Clang 3.5 or newer"
- #endif
--#elif LJ_TARGET_S390
-+#elif LJ_TARGET_S390x
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
-
-From d093cff04ade9b61a9a8e926387f81b40a3bebd7 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 11 Nov 2016 12:08:47 +0530
-Subject: [PATCH 007/260] Update Makefile
-
-changed S390 to S390x
----
- src/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 9f7d28ce3..40cd10159 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -238,8 +238,8 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
--ifneq (,$(findstring LJ_TARGET_S390 ,$(TARGET_TESTARCH)))
-- TARGET_LJARCH= s390
-+ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
-+ TARGET_LJARCH= s390x
- else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm64
-
-From 9c3c87bc61d7aac050c4f9b0a43ec53e55e96590 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 11 Nov 2016 12:13:30 +0530
-Subject: [PATCH 009/260] Update lj_target_s390x.h
-
-removed un replaced arm instructions
-changed S390 to S390x
----
- src/lj_target_s390x.h | 129 ++++--------------------------------------
- 1 file changed, 12 insertions(+), 117 deletions(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 7da2063d2..27bb34963 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -10,12 +10,15 @@
-
- #define GPRDEF(_) \
- _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
-- _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(15) \
-+ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _R(15) \
- #if LJ_SOFTFP
- #define FPRDEF(_)
- #else
- #define FPRDEF(_) \
-- _(F0) _(F2) _(F4) _(F6)
-+ _(F0) _(F1) _(F2) _(F3) \
-+ _(F4) _(F5) _(F6) _(F7) \
-+ _(F8) _(F9) _(F10) _(F11) \
-+ _(F12) _(F13) _(F14) _(F15)
- #endif
- #define VRIDDEF(_)
-
-@@ -150,54 +153,7 @@ typedef struct {
- #define ARMF_SH(sh, n) (((sh) << 5) | ((n) << 7))
- #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-
--typedef enum S390Ins {
--
-- // Unsupported in S390
-- #ARMI_LDRSB = 0xe01000d0,
-- #ARMI_S = 0x000100000,
-- #ARMI_LDRD = 0xe00000d0,
-- #ARMI_ADC = 0xe0a00000,
-- #ARMI_SBC = 0xe0c00000,
-- #ARMI_STRB = 0xe4400000,
-- #ARMI_STRH = 0xe00000b0,
-- #ARMI_STRD = 0xe00000f0,
-- #ARMI_BL = 0xeb000000,
-- #ARMI_BLX = 0xfa000000,
-- #ARMI_BLXr = 0xe12fff30,
-- #ARMI_BIC = 0xe1c00000,
-- #ARMI_ORR = 0xe1800000,
-- #ARMI_LDRB = 0xe4500000,
-- #ARMI_MVN = 0xe1e00000,
-- #ARMI_LDRSH = 0xe01000f0,
-- #ARMI_NOP = 0xe1a00000,
-- #ARMI_PUSH = 0xe92d0000,
-- #ARMI_RSB = 0xe0600000,
-- #ARMI_RSC = 0xe0e00000,
-- #ARMI_TEQ = 0xe1300000,
-- #ARMI_CCAL = 0xe0000000,
-- #ARMI_K12 = 0x02000000,
-- #ARMI_KNEG = 0x00200000,
-- #ARMI_LS_W = 0x00200000,
-- #ARMI_LS_U = 0x00800000,
-- #ARMI_LS_P = 0x01000000,
-- #ARMI_LS_R = 0x02000000,
-- #ARMI_LSX_I = 0x00400000,
--
--
-- #ARMI_SUB = 0xe0400000,
-- #ARMI_ADD = 0xe0800000,
-- #ARMI_AND = 0xe0000000,
-- #ARMI_EOR = 0xe0200000,
-- #ARMI_MUL = 0xe0000090,
-- #ARMI_LDR = 0xe4100000,
-- #ARMI_CMP = 0xe1500000,
-- #ARMI_LDRH = 0xe01000b0,
-- #ARMI_B = 0xea000000,
-- #ARMI_MOV = 0xe1a00000,
-- #ARMI_STR = 0xe4000000,
-- #ARMI_TST = 0xe1100000,
-- #ARMI_SMULL = 0xe0c00090,
-- #ARMI_CMN = 0xe1700000,
-+typedef enum S390xIns {
- S390I_SR = 0x1B000000,
- S390I_AR = 0x1A000000,
- S390I_NR = 0x14000000,
-@@ -212,76 +168,15 @@ typedef enum S390Ins {
- S390I_TM = 0x91000000,
- S390I_MP = 0xbd000090,
- S390I_CLR = 0x15000000,
-+} S390xIns;
-
-- /* ARMv6 */
-- #ARMI_REV = 0xe6bf0f30,
-- #ARMI_SXTB = 0xe6af0070,
-- #ARMI_SXTH = 0xe6bf0070,
-- #ARMI_UXTB = 0xe6ef0070,
-- #ARMI_UXTH = 0xe6ff0070,
--
-- /* ARMv6T2 */
-- #ARMI_MOVW = 0xe3000000,
-- #ARMI_MOVT = 0xe3400000,
--
-- /* VFP */
-- ARMI_VMOV_D = 0xeeb00b40,
-- ARMI_VMOV_S = 0xeeb00a40,
-- ARMI_VMOVI_D = 0xeeb00b00,
--
-- ARMI_VMOV_R_S = 0xee100a10,
-- ARMI_VMOV_S_R = 0xee000a10,
-- ARMI_VMOV_RR_D = 0xec500b10,
-- ARMI_VMOV_D_RR = 0xec400b10,
--
-- ARMI_VADD_D = 0xee300b00,
-- ARMI_VSUB_D = 0xee300b40,
-- ARMI_VMUL_D = 0xee200b00,
-- ARMI_VMLA_D = 0xee000b00,
-- ARMI_VMLS_D = 0xee000b40,
-- ARMI_VNMLS_D = 0xee100b00,
-- ARMI_VDIV_D = 0xee800b00,
--
-- ARMI_VABS_D = 0xeeb00bc0,
-- ARMI_VNEG_D = 0xeeb10b40,
-- ARMI_VSQRT_D = 0xeeb10bc0,
--
-- ARMI_VCMP_D = 0xeeb40b40,
-- ARMI_VCMPZ_D = 0xeeb50b40,
--
-- ARMI_VMRS = 0xeef1fa10,
--
-- ARMI_VCVT_S32_F32 = 0xeebd0ac0,
-- ARMI_VCVT_S32_F64 = 0xeebd0bc0,
-- ARMI_VCVT_U32_F32 = 0xeebc0ac0,
-- ARMI_VCVT_U32_F64 = 0xeebc0bc0,
-- ARMI_VCVTR_S32_F32 = 0xeebd0a40,
-- ARMI_VCVTR_S32_F64 = 0xeebd0b40,
-- ARMI_VCVTR_U32_F32 = 0xeebc0a40,
-- ARMI_VCVTR_U32_F64 = 0xeebc0b40,
-- ARMI_VCVT_F32_S32 = 0xeeb80ac0,
-- ARMI_VCVT_F64_S32 = 0xeeb80bc0,
-- ARMI_VCVT_F32_U32 = 0xeeb80a40,
-- ARMI_VCVT_F64_U32 = 0xeeb80b40,
-- ARMI_VCVT_F32_F64 = 0xeeb70bc0,
-- ARMI_VCVT_F64_F32 = 0xeeb70ac0,
--
-- ARMI_VLDR_S = 0xed100a00,
-- ARMI_VLDR_D = 0xed100b00,
-- ARMI_VSTR_S = 0xed000a00,
-- ARMI_VSTR_D = 0xed000b00,
--} S390Ins;
--
--typedef enum S390Shift {
-+typedef enum S390xShift {
- S390SH_SLL, S390SH_SRL, S390SH_SRA
-- # Adjustment needed for ROR
--} S390Shift;
-+} S390xShift;
-
- /* ARM condition codes. */
--typedef enum ARMCC {
-- CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
-- CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
-- CC_HS = CC_CS, CC_LO = CC_CC
--} ARMCC;
-+typedef enum S390xCC {
-+
-+} S390xCC;
-
- #endif
-
-From 96afe3e331fc6aef12d3479ad2e8ae495bb7fc12 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Tue, 15 Nov 2016 10:42:11 +0530
-Subject: [PATCH 010/260] Create vm_s390x.dasc
-
-created vm_s390x.dasc file
-its a copy of vm_x86.dasc
-working on to change this specific to s390x
----
- src/vm_s390x.dasc | 5779 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 5779 insertions(+)
- create mode 100644 src/vm_s390x.dasc
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-new file mode 100644
-index 000000000..d7d618d3b
---- /dev/null
-+++ b/src/vm_s390x.dasc
-@@ -0,0 +1,5779 @@
-+|// Low-level VM code for x86 CPUs.
-+|// Bytecode interpreter, fast functions and helper functions.
-+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
-+|
-+|.if P64
-+|.arch x64
-+|.else
-+|.arch x86
-+|.endif
-+|.section code_op, code_sub
-+|
-+|.actionlist build_actionlist
-+|.globals GLOB_
-+|.globalnames globnames
-+|.externnames extnames
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|.if P64
-+|.define X64, 1
-+|.if WIN
-+|.define X64WIN, 1
-+|.endif
-+|.endif
-+|
-+|// Fixed register assignments for the interpreter.
-+|// This is very fragile and has many dependencies. Caveat emptor.
-+|.define BASE, edx // Not C callee-save, refetched anyway.
-+|.if not X64
-+|.define KBASE, edi // Must be C callee-save.
-+|.define KBASEa, KBASE
-+|.define PC, esi // Must be C callee-save.
-+|.define PCa, PC
-+|.define DISPATCH, ebx // Must be C callee-save.
-+|.elif X64WIN
-+|.define KBASE, edi // Must be C callee-save.
-+|.define KBASEa, rdi
-+|.define PC, esi // Must be C callee-save.
-+|.define PCa, rsi
-+|.define DISPATCH, ebx // Must be C callee-save.
-+|.else
-+|.define KBASE, r15d // Must be C callee-save.
-+|.define KBASEa, r15
-+|.define PC, ebx // Must be C callee-save.
-+|.define PCa, rbx
-+|.define DISPATCH, r14d // Must be C callee-save.
-+|.endif
-+|
-+|.define RA, ecx
-+|.define RAH, ch
-+|.define RAL, cl
-+|.define RB, ebp // Must be ebp (C callee-save).
-+|.define RC, eax // Must be eax.
-+|.define RCW, ax
-+|.define RCH, ah
-+|.define RCL, al
-+|.define OP, RB
-+|.define RD, RC
-+|.define RDW, RCW
-+|.define RDL, RCL
-+|.if X64
-+|.define RAa, rcx
-+|.define RBa, rbp
-+|.define RCa, rax
-+|.define RDa, rax
-+|.else
-+|.define RAa, RA
-+|.define RBa, RB
-+|.define RCa, RC
-+|.define RDa, RD
-+|.endif
-+|
-+|.if not X64
-+|.define FCARG1, ecx // x86 fastcall arguments.
-+|.define FCARG2, edx
-+|.elif X64WIN
-+|.define CARG1, rcx // x64/WIN64 C call arguments.
-+|.define CARG2, rdx
-+|.define CARG3, r8
-+|.define CARG4, r9
-+|.define CARG1d, ecx
-+|.define CARG2d, edx
-+|.define CARG3d, r8d
-+|.define CARG4d, r9d
-+|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
-+|.define FCARG2, CARG2d
-+|.else
-+|.define CARG1, rdi // x64/POSIX C call arguments.
-+|.define CARG2, rsi
-+|.define CARG3, rdx
-+|.define CARG4, rcx
-+|.define CARG5, r8
-+|.define CARG6, r9
-+|.define CARG1d, edi
-+|.define CARG2d, esi
-+|.define CARG3d, edx
-+|.define CARG4d, ecx
-+|.define CARG5d, r8d
-+|.define CARG6d, r9d
-+|.define FCARG1, CARG1d // Simulate x86 fastcall.
-+|.define FCARG2, CARG2d
-+|.endif
-+|
-+|// Type definitions. Some of these are only used for documentation.
-+|.type L, lua_State
-+|.type GL, global_State
-+|.type TVALUE, TValue
-+|.type GCOBJ, GCobj
-+|.type STR, GCstr
-+|.type TAB, GCtab
-+|.type LFUNC, GCfuncL
-+|.type CFUNC, GCfuncC
-+|.type PROTO, GCproto
-+|.type UPVAL, GCupval
-+|.type NODE, Node
-+|.type NARGS, int
-+|.type TRACE, GCtrace
-+|.type SBUF, SBuf
-+|
-+|// Stack layout while in interpreter. Must match with lj_frame.h.
-+|//-----------------------------------------------------------------------
-+|.if not X64 // x86 stack layout.
-+|
-+|.if WIN
-+|
-+|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
-+|.macro saveregs_
-+| push edi; push esi; push ebx
-+| push extern lj_err_unwind_win
-+| fs; push dword [0]
-+| fs; mov [0], esp
-+| sub esp, CFRAME_SPACE
-+|.endmacro
-+|.macro restoreregs
-+| add esp, CFRAME_SPACE
-+| fs; pop dword [0]
-+| pop edi // Short for esp += 4.
-+| pop ebx; pop esi; pop edi; pop ebp
-+|.endmacro
-+|
-+|.else
-+|
-+|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
-+|.macro saveregs_
-+| push edi; push esi; push ebx
-+| sub esp, CFRAME_SPACE
-+|.endmacro
-+|.macro restoreregs
-+| add esp, CFRAME_SPACE
-+| pop ebx; pop esi; pop edi; pop ebp
-+|.endmacro
-+|
-+|.endif
-+|
-+|.macro saveregs
-+| push ebp; saveregs_
-+|.endmacro
-+|
-+|.if WIN
-+|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
-+|.define SAVE_NRES, aword [esp+aword*18]
-+|.define SAVE_CFRAME, aword [esp+aword*17]
-+|.define SAVE_L, aword [esp+aword*16]
-+|//----- 16 byte aligned, ^^^ arguments from C caller
-+|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
-+|.define SAVE_R4, aword [esp+aword*14]
-+|.define SAVE_R3, aword [esp+aword*13]
-+|.define SAVE_R2, aword [esp+aword*12]
-+|//----- 16 byte aligned
-+|.define SAVE_R1, aword [esp+aword*11]
-+|.define SEH_FUNC, aword [esp+aword*10]
-+|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
-+|.define UNUSED2, aword [esp+aword*8]
-+|//----- 16 byte aligned
-+|.define UNUSED1, aword [esp+aword*7]
-+|.define SAVE_PC, aword [esp+aword*6]
-+|.define TMP2, aword [esp+aword*5]
-+|.define TMP1, aword [esp+aword*4]
-+|//----- 16 byte aligned
-+|.define ARG4, aword [esp+aword*3]
-+|.define ARG3, aword [esp+aword*2]
-+|.define ARG2, aword [esp+aword*1]
-+|.define ARG1, aword [esp] //<-- esp while in interpreter.
-+|//----- 16 byte aligned, ^^^ arguments for C callee
-+|.else
-+|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
-+|.define SAVE_NRES, aword [esp+aword*14]
-+|.define SAVE_CFRAME, aword [esp+aword*13]
-+|.define SAVE_L, aword [esp+aword*12]
-+|//----- 16 byte aligned, ^^^ arguments from C caller
-+|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
-+|.define SAVE_R4, aword [esp+aword*10]
-+|.define SAVE_R3, aword [esp+aword*9]
-+|.define SAVE_R2, aword [esp+aword*8]
-+|//----- 16 byte aligned
-+|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
-+|.define SAVE_PC, aword [esp+aword*6]
-+|.define TMP2, aword [esp+aword*5]
-+|.define TMP1, aword [esp+aword*4]
-+|//----- 16 byte aligned
-+|.define ARG4, aword [esp+aword*3]
-+|.define ARG3, aword [esp+aword*2]
-+|.define ARG2, aword [esp+aword*1]
-+|.define ARG1, aword [esp] //<-- esp while in interpreter.
-+|//----- 16 byte aligned, ^^^ arguments for C callee
-+|.endif
-+|
-+|// FPARGx overlaps ARGx and ARG(x+1) on x86.
-+|.define FPARG3, qword [esp+qword*1]
-+|.define FPARG1, qword [esp]
-+|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
-+|.define TMPQ, qword [esp+aword*4]
-+|.define TMP3, ARG4
-+|.define ARG5, TMP1
-+|.define TMPa, TMP1
-+|.define MULTRES, TMP2
-+|
-+|// Arguments for vm_call and vm_pcall.
-+|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
-+|
-+|// Arguments for vm_cpcall.
-+|.define INARG_CP_CALL, SAVE_ERRF
-+|.define INARG_CP_UD, SAVE_NRES
-+|.define INARG_CP_FUNC, SAVE_CFRAME
-+|
-+|//-----------------------------------------------------------------------
-+|.elif X64WIN // x64/Windows stack layout
-+|
-+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
-+|.macro saveregs_
-+| push rdi; push rsi; push rbx
-+| sub rsp, CFRAME_SPACE
-+|.endmacro
-+|.macro saveregs
-+| push rbp; saveregs_
-+|.endmacro
-+|.macro restoreregs
-+| add rsp, CFRAME_SPACE
-+| pop rbx; pop rsi; pop rdi; pop rbp
-+|.endmacro
-+|
-+|.define SAVE_CFRAME, aword [rsp+aword*13]
-+|.define SAVE_PC, dword [rsp+dword*25]
-+|.define SAVE_L, dword [rsp+dword*24]
-+|.define SAVE_ERRF, dword [rsp+dword*23]
-+|.define SAVE_NRES, dword [rsp+dword*22]
-+|.define TMP2, dword [rsp+dword*21]
-+|.define TMP1, dword [rsp+dword*20]
-+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
-+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*8]
-+|.define SAVE_R3, aword [rsp+aword*7]
-+|.define SAVE_R2, aword [rsp+aword*6]
-+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.define ARG5, aword [rsp+aword*4]
-+|.define CSAVE_4, aword [rsp+aword*3]
-+|.define CSAVE_3, aword [rsp+aword*2]
-+|.define CSAVE_2, aword [rsp+aword*1]
-+|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
-+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
-+|
-+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
-+|.define TMPQ, qword [rsp+aword*10]
-+|.define MULTRES, TMP2
-+|.define TMPa, ARG5
-+|.define ARG5d, dword [rsp+aword*4]
-+|.define TMP3, ARG5d
-+|
-+|//-----------------------------------------------------------------------
-+|.else // x64/POSIX stack layout
-+|
-+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
-+|.macro saveregs_
-+| push rbx; push r15; push r14
-+|.if NO_UNWIND
-+| push r13; push r12
-+|.endif
-+| sub rsp, CFRAME_SPACE
-+|.endmacro
-+|.macro saveregs
-+| push rbp; saveregs_
-+|.endmacro
-+|.macro restoreregs
-+| add rsp, CFRAME_SPACE
-+|.if NO_UNWIND
-+| pop r12; pop r13
-+|.endif
-+| pop r14; pop r15; pop rbx; pop rbp
-+|.endmacro
-+|
-+|//----- 16 byte aligned,
-+|.if NO_UNWIND
-+|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*10]
-+|.define SAVE_R3, aword [rsp+aword*9]
-+|.define SAVE_R2, aword [rsp+aword*8]
-+|.define SAVE_R1, aword [rsp+aword*7]
-+|.define SAVE_RU2, aword [rsp+aword*6]
-+|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.else
-+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
-+|.define SAVE_R4, aword [rsp+aword*8]
-+|.define SAVE_R3, aword [rsp+aword*7]
-+|.define SAVE_R2, aword [rsp+aword*6]
-+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
-+|.endif
-+|.define SAVE_CFRAME, aword [rsp+aword*4]
-+|.define SAVE_PC, dword [rsp+dword*7]
-+|.define SAVE_L, dword [rsp+dword*6]
-+|.define SAVE_ERRF, dword [rsp+dword*5]
-+|.define SAVE_NRES, dword [rsp+dword*4]
-+|.define TMPa, aword [rsp+aword*1]
-+|.define TMP2, dword [rsp+dword*1]
-+|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
-+|//----- 16 byte aligned
-+|
-+|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
-+|.define TMPQ, qword [rsp]
-+|.define TMP3, dword [rsp+aword*1]
-+|.define MULTRES, TMP2
-+|
-+|.endif
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|// Instruction headers.
-+|.macro ins_A; .endmacro
-+|.macro ins_AD; .endmacro
-+|.macro ins_AJ; .endmacro
-+|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
-+|.macro ins_AB_; movzx RB, RCH; .endmacro
-+|.macro ins_A_C; movzx RC, RCL; .endmacro
-+|.macro ins_AND; not RDa; .endmacro
-+|
-+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
-+|.macro ins_NEXT
-+| mov RC, [PC]
-+| movzx RA, RCH
-+| movzx OP, RCL
-+| add PC, 4
-+| shr RC, 16
-+|.if X64
-+| jmp aword [DISPATCH+OP*8]
-+|.else
-+| jmp aword [DISPATCH+OP*4]
-+|.endif
-+|.endmacro
-+|
-+|// Instruction footer.
-+|.if 1
-+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
-+| .define ins_next, ins_NEXT
-+| .define ins_next_, ins_NEXT
-+|.else
-+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
-+| // Affects only certain kinds of benchmarks (and only with -j off).
-+| // Around 10%-30% slower on Core2, a lot more slower on P4.
-+| .macro ins_next
-+| jmp ->ins_next
-+| .endmacro
-+| .macro ins_next_
-+| ->ins_next:
-+| ins_NEXT
-+| .endmacro
-+|.endif
-+|
-+|// Call decode and dispatch.
-+|.macro ins_callt
-+| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
-+| mov PC, LFUNC:RB->pc
-+| mov RA, [PC]
-+| movzx OP, RAL
-+| movzx RA, RAH
-+| add PC, 4
-+|.if X64
-+| jmp aword [DISPATCH+OP*8]
-+|.else
-+| jmp aword [DISPATCH+OP*4]
-+|.endif
-+|.endmacro
-+|
-+|.macro ins_call
-+| // BASE = new base, RB = LFUNC, RD = nargs+1
-+| mov [BASE-4], PC
-+| ins_callt
-+|.endmacro
-+|
-+|//-----------------------------------------------------------------------
-+|
-+|// Macros to test operand types.
-+|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
-+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
-+|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
-+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
-+|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
-+|
-+|// These operands must be used with movzx.
-+|.define PC_OP, byte [PC-4]
-+|.define PC_RA, byte [PC-3]
-+|.define PC_RB, byte [PC-1]
-+|.define PC_RC, byte [PC-2]
-+|.define PC_RD, word [PC-2]
-+|
-+|.macro branchPC, reg
-+| lea PC, [PC+reg*4-BCBIAS_J*4]
-+|.endmacro
-+|
-+|// Assumes DISPATCH is relative to GL.
-+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
-+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-+|
-+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-+|
-+|// Decrement hashed hotcount and trigger trace recorder if zero.
-+|.macro hotloop, reg
-+| mov reg, PC
-+| shr reg, 1
-+| and reg, HOTCOUNT_PCMASK
-+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
-+| jb ->vm_hotloop
-+|.endmacro
-+|
-+|.macro hotcall, reg
-+| mov reg, PC
-+| shr reg, 1
-+| and reg, HOTCOUNT_PCMASK
-+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
-+| jb ->vm_hotcall
-+|.endmacro
-+|
-+|// Set current VM state.
-+|.macro set_vmstate, st
-+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
-+|.endmacro
-+|
-+|// x87 compares.
-+|.macro fcomparepp // Compare and pop st0 >< st1.
-+| fucomip st1
-+| fpop
-+|.endmacro
-+|
-+|.macro fpop1; fstp st1; .endmacro
-+|
-+|// Synthesize SSE FP constants.
-+|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
-+|.if X64
-+| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
-+|.else
-+| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
-+|.endif
-+|.endmacro
-+|
-+|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
-+|.if X64
-+| mov64 tmp, U64x(val,00000000); movd reg, tmp
-+|.else
-+| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
-+|.endif
-+|.endmacro
-+|
-+|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
-+| sseconst_hi reg, tmp, 80000000
-+|.endmacro
-+|.macro sseconst_1, reg, tmp // Synthesize 1.0.
-+| sseconst_hi reg, tmp, 3ff00000
-+|.endmacro
-+|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
-+| sseconst_hi reg, tmp, bff00000
-+|.endmacro
-+|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
-+| sseconst_hi reg, tmp, 43300000
-+|.endmacro
-+|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
-+| sseconst_hi reg, tmp, 43380000
-+|.endmacro
-+|
-+|// Move table write barrier back. Overwrites reg.
-+|.macro barrierback, tab, reg
-+| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
-+| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
-+| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
-+| mov tab->gclist, reg
-+|.endmacro
-+|
-+|//-----------------------------------------------------------------------
-+
-+/* Generate subroutines used by opcodes and other parts of the VM. */
-+/* The .code_sub section should be last to help static branch prediction. */
-+static void build_subroutines(BuildCtx *ctx)
-+{
-+ |.code_sub
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Return handling ----------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_returnp:
-+ | test PC, FRAME_P
-+ | jz ->cont_dispatch
-+ |
-+ | // Return from pcall or xpcall fast func.
-+ | and PC, -8
-+ | sub BASE, PC // Restore caller base.
-+ | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
-+ | mov PC, [BASE-4] // Fetch PC of previous frame.
-+ | // Prepending may overwrite the pcall frame, so do it at the end.
-+ | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
-+ |
-+ |->vm_returnc:
-+ | add RD, 1 // RD = nresults+1
-+ | jz ->vm_unwind_yield
-+ | mov MULTRES, RD
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z // Handle regular return to Lua.
-+ |
-+ |->vm_return:
-+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
-+ | xor PC, FRAME_C
-+ | test PC, FRAME_TYPE
-+ | jnz ->vm_returnp
-+ |
-+ | // Return to C.
-+ | set_vmstate C
-+ | and PC, -8
-+ | sub PC, BASE
-+ | neg PC // Previous base = BASE - delta.
-+ |
-+ | sub RD, 1
-+ | jz >2
-+ |1: // Move results down.
-+ |.if X64
-+ | mov RBa, [BASE+RA]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [BASE+RA]
-+ | mov [BASE-8], RB
-+ | mov RB, [BASE+RA+4]
-+ | mov [BASE-4], RB
-+ |.endif
-+ | add BASE, 8
-+ | sub RD, 1
-+ | jnz <1
-+ |2:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, PC
-+ |3:
-+ | mov RD, MULTRES
-+ | mov RA, SAVE_NRES // RA = wanted nresults+1
-+ |4:
-+ | cmp RA, RD
-+ | jne >6 // More/less results wanted?
-+ |5:
-+ | sub BASE, 8
-+ | mov L:RB->top, BASE
-+ |
-+ |->vm_leave_cp:
-+ | mov RAa, SAVE_CFRAME // Restore previous C frame.
-+ | mov L:RB->cframe, RAa
-+ | xor eax, eax // Ok return status for vm_pcall.
-+ |
-+ |->vm_leave_unw:
-+ | restoreregs
-+ | ret
-+ |
-+ |6:
-+ | jb >7 // Less results wanted?
-+ | // More results wanted. Check stack size and fill up results with nil.
-+ | cmp BASE, L:RB->maxstack
-+ | ja >8
-+ | mov dword [BASE-4], LJ_TNIL
-+ | add BASE, 8
-+ | add RD, 1
-+ | jmp <4
-+ |
-+ |7: // Less results wanted.
-+ | test RA, RA
-+ | jz <5 // But check for LUA_MULTRET+1.
-+ | sub RA, RD // Negative result!
-+ | lea BASE, [BASE+RA*8] // Correct top.
-+ | jmp <5
-+ |
-+ |8: // Corner case: need to grow stack for filling up results.
-+ | // This can happen if:
-+ | // - A C function grows the stack (a lot).
-+ | // - The GC shrinks the stack in between.
-+ | // - A return back from a lua_call() with (high) nresults adjustment.
-+ | mov L:RB->top, BASE // Save current top held in BASE (yes).
-+ | mov MULTRES, RD // Need to fill only remainder with nil.
-+ | mov FCARG2, RA
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
-+ | jmp <3
-+ |
-+ |->vm_unwind_yield:
-+ | mov al, LUA_YIELD
-+ | jmp ->vm_unwind_c_eh
-+ |
-+ |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
-+ | // (void *cframe, int errcode)
-+ |.if X64
-+ | mov eax, CARG2d // Error return status for vm_pcall.
-+ | mov rsp, CARG1
-+ |.else
-+ | mov eax, FCARG2 // Error return status for vm_pcall.
-+ | mov esp, FCARG1
-+ |.if WIN
-+ | lea FCARG1, SEH_NEXT
-+ | fs; mov [0], FCARG1
-+ |.endif
-+ |.endif
-+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
-+ | mov L:RB, SAVE_L
-+ | mov GL:RB, L:RB->glref
-+ | mov dword GL:RB->vmstate, ~LJ_VMST_C
-+ | jmp ->vm_leave_unw
-+ |
-+ |->vm_unwind_rethrow:
-+ |.if X64 and not X64WIN
-+ | mov FCARG1, SAVE_L
-+ | mov FCARG2, eax
-+ | restoreregs
-+ | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
-+ |.endif
-+ |
-+ |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
-+ | // (void *cframe)
-+ |.if X64
-+ | and CARG1, CFRAME_RAWMASK
-+ | mov rsp, CARG1
-+ |.else
-+ | and FCARG1, CFRAME_RAWMASK
-+ | mov esp, FCARG1
-+ |.if WIN
-+ | lea FCARG1, SEH_NEXT
-+ | fs; mov [0], FCARG1
-+ |.endif
-+ |.endif
-+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
-+ | mov L:RB, SAVE_L
-+ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-+ | mov RD, 1+1 // Really 1+2 results, incr. later.
-+ | mov BASE, L:RB->base
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | add DISPATCH, GG_G2DISP
-+ | mov PC, [BASE-4] // Fetch PC of previous frame.
-+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
-+ | set_vmstate INTERP
-+ | jmp ->vm_returnc // Increments RD/MULTRES and returns.
-+ |
-+ |.if WIN and not X64
-+ |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
-+ | // (void *cframe, void *excptrec, void *unwinder, int errcode)
-+ | mov [esp], FCARG1 // Return value for RtlUnwind.
-+ | push FCARG2 // Exception record for RtlUnwind.
-+ | push 0 // Ignored by RtlUnwind.
-+ | push dword [FCARG1+CFRAME_OFS_SEH]
-+ | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
-+ | mov FCARG1, eax
-+ | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
-+ | ret // Jump to unwinder.
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Grow stack for calls -----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_growstack_c: // Grow stack for C function.
-+ | mov FCARG2, LUA_MINSTACK
-+ | jmp >2
-+ |
-+ |->vm_growstack_v: // Grow stack for vararg Lua function.
-+ | sub RD, 8
-+ | jmp >1
-+ |
-+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
-+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ |1:
-+ | movzx RA, byte [PC-4+PC2PROTO(framesize)]
-+ | add PC, 4 // Must point after first instruction.
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov SAVE_PC, PC
-+ | mov FCARG2, RA
-+ |2:
-+ | // RB = L, L->base = new base, L->top = top
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | mov LFUNC:RB, [BASE-8]
-+ | sub RD, BASE
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | // BASE = new base, RB = LFUNC, RD = nargs+1
-+ | ins_callt // Just retry the call.
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Entry points into the assembler VM ---------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_resume: // Setup C frame and resume thread.
-+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
-+ | saveregs
-+ |.if X64
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ | mov RA, CARG2d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-+ |.endif
-+ | mov PC, FRAME_CP
-+ | xor RD, RD
-+ | lea KBASEa, [esp+CFRAME_RESUME]
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | add DISPATCH, GG_G2DISP
-+ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
-+ | mov SAVE_CFRAME, RDa
-+ |.if X64
-+ | mov SAVE_NRES, RD
-+ | mov SAVE_ERRF, RD
-+ |.endif
-+ | mov L:RB->cframe, KBASEa
-+ | cmp byte L:RB->status, RDL
-+ | je >2 // Initial resume (like a call).
-+ |
-+ | // Resume after yield (like a return).
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | mov byte L:RB->status, RDL
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | sub RD, RA
-+ | shr RD, 3
-+ | add RD, 1 // RD = nresults+1
-+ | sub RA, BASE // RA = resultofs
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z
-+ | jmp ->vm_return
-+ |
-+ |->vm_pcall: // Setup protected C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
-+ | saveregs
-+ | mov PC, FRAME_CP
-+ |.if X64
-+ | mov SAVE_ERRF, CARG4d
-+ |.endif
-+ | jmp >1
-+ |
-+ |->vm_call: // Setup C frame and enter VM.
-+ | // (lua_State *L, TValue *base, int nres1)
-+ | saveregs
-+ | mov PC, FRAME_C
-+ |
-+ |1: // Entry point for vm_pcall above (PC = ftype).
-+ |.if X64
-+ | mov SAVE_NRES, CARG3d
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ | mov RA, CARG2d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
-+ |.endif
-+ |
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASEa
-+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-+ | add DISPATCH, GG_G2DISP
-+ |.if X64
-+ | mov L:RB->cframe, rsp
-+ |.else
-+ | mov L:RB->cframe, esp
-+ |.endif
-+ |
-+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
-+ | add PC, RA
-+ | sub PC, BASE // PC = frame delta + frame type
-+ |
-+ | mov RD, L:RB->top
-+ | sub RD, RA
-+ | shr NARGS:RD, 3
-+ | add NARGS:RD, 1 // RD = nargs+1
-+ |
-+ |->vm_call_dispatch:
-+ | mov LFUNC:RB, [RA-8]
-+ | cmp dword [RA-4], LJ_TFUNC
-+ | jne ->vmeta_call // Ensure KBASE defined and != BASE.
-+ |
-+ |->vm_call_dispatch_f:
-+ | mov BASE, RA
-+ | ins_call
-+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
-+ |
-+ |->vm_cpcall: // Setup protected C frame, call C.
-+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
-+ | saveregs
-+ |.if X64
-+ | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
-+ | mov SAVE_L, CARG1d
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
-+ | mov RC, INARG_CP_UD // Get args before they are overwritten.
-+ | mov RA, INARG_CP_FUNC
-+ | mov BASE, INARG_CP_CALL
-+ |.endif
-+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
-+ |
-+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
-+ | sub KBASE, L:RB->top
-+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
-+ | mov SAVE_ERRF, 0 // No error function.
-+ | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
-+ | add DISPATCH, GG_G2DISP
-+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
-+ |
-+ |.if X64
-+ | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASEa
-+ | mov L:RB->cframe, rsp
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ |
-+ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
-+ |.else
-+ | mov ARG3, RC // Have to copy args downwards.
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ |
-+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
-+ | mov SAVE_CFRAME, KBASE
-+ | mov L:RB->cframe, esp
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ |
-+ | call BASE // (lua_State *L, lua_CFunction func, void *ud)
-+ |.endif
-+ | // TValue * (new base) or NULL returned in eax (RC).
-+ | test RC, RC
-+ | jz ->vm_leave_cp // No base? Just remove C frame.
-+ | mov RA, RC
-+ | mov PC, FRAME_CP
-+ | jmp <2 // Else continue with the call.
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Metamethod handling ------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |//-- Continuation dispatch ----------------------------------------------
-+ |
-+ |->cont_dispatch:
-+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
-+ | add RA, BASE
-+ | and PC, -8
-+ | mov RB, BASE
-+ | sub BASE, PC // Restore caller BASE.
-+ | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
-+ | mov RC, RA // ... in [RC]
-+ | mov PC, [RB-12] // Restore PC from [cont|PC].
-+ |.if X64
-+ | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
-+ |.if FFI
-+ | cmp RA, 1
-+ | jbe >1
-+ |.endif
-+ | lea KBASEa, qword [=>0]
-+ | add RAa, KBASEa
-+ |.else
-+ | mov RA, dword [RB-16]
-+ |.if FFI
-+ | cmp RA, 1
-+ | jbe >1
-+ |.endif
-+ |.endif
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | // BASE = base, RC = result, RB = meta base
-+ | jmp RAa // Jump to continuation.
-+ |
-+ |.if FFI
-+ |1:
-+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
-+ | // cont = 0: Tail call from C function.
-+ | sub RB, BASE
-+ | shr RB, 3
-+ | lea RD, [RB-1]
-+ | jmp ->vm_call_tail
-+ |.endif
-+ |
-+ |->cont_cat: // BASE = base, RC = result, RB = mbase
-+ | movzx RA, PC_RB
-+ | sub RB, 16
-+ | lea RA, [BASE+RA*8]
-+ | sub RA, RB
-+ | je ->cont_ra
-+ | neg RA
-+ | shr RA, 3
-+ |.if X64WIN
-+ | mov CARG3d, RA
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | mov RCa, [RC]
-+ | mov [RB], RCa
-+ | mov CARG2d, RB
-+ |.elif X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | mov CARG3d, RA
-+ | mov RAa, [RC]
-+ | mov [RB], RAa
-+ | mov CARG2d, RB
-+ |.else
-+ | mov ARG3, RA
-+ | mov RA, [RC+4]
-+ | mov RC, [RC]
-+ | mov [RB+4], RA
-+ | mov [RB], RC
-+ | mov ARG2, RB
-+ |.endif
-+ | jmp ->BC_CAT_Z
-+ |
-+ |//-- Table indexing metamethods -----------------------------------------
-+ |
-+ |->vmeta_tgets:
-+ | mov TMP1, RC // RC = GCstr *
-+ | mov TMP2, LJ_TSTR
-+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | cmp PC_OP, BC_GGET
-+ | jne >1
-+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-+ | mov [RA], TAB:RB // RB = GCtab *
-+ | mov dword [RA+4], LJ_TTAB
-+ | mov RB, RA
-+ | jmp >2
-+ |
-+ |->vmeta_tgetb:
-+ | movzx RC, PC_RC
-+ |.if DUALNUM
-+ | mov TMP2, LJ_TISNUM
-+ | mov TMP1, RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ | movsd TMPQ, xmm0
-+ |.endif
-+ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-+ | jmp >1
-+ |
-+ |->vmeta_tgetv:
-+ | movzx RC, PC_RC // Reload TValue *k from RC.
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | movzx RB, PC_RB // Reload TValue *t from RB.
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RB
-+ | mov CARG3, RCa // May be 64 bit ptr to stack.
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz >3
-+ |->cont_ra: // BASE = base, RC = result
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC+4]
-+ | mov RC, [RC]
-+ | mov [BASE+RA*8+4], RB
-+ | mov [BASE+RA*8], RC
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Call __index metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k
-+ | mov RA, L:RB->top
-+ | mov [RA-12], PC // [cont|PC]
-+ | lea PC, [RA+FRAME_CONT]
-+ | sub PC, BASE
-+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-+ | mov NARGS:RD, 2+1 // 2 args for func(t, k).
-+ | jmp ->vm_call_dispatch_f
-+ |
-+ |->vmeta_tgetr:
-+ | mov FCARG1, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov FCARG2, RC // Caveat: FCARG2 == BASE
-+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in eax (RC).
-+ | movzx RA, PC_RA
-+ | mov BASE, RB // Restore BASE.
-+ | test RC, RC
-+ | jnz ->BC_TGETR_Z
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp ->BC_TGETR2_Z
-+ |
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vmeta_tsets:
-+ | mov TMP1, RC // RC = GCstr *
-+ | mov TMP2, LJ_TSTR
-+ | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | cmp PC_OP, BC_GSET
-+ | jne >1
-+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
-+ | mov [RA], TAB:RB // RB = GCtab *
-+ | mov dword [RA+4], LJ_TTAB
-+ | mov RB, RA
-+ | jmp >2
-+ |
-+ |->vmeta_tsetb:
-+ | movzx RC, PC_RC
-+ |.if DUALNUM
-+ | mov TMP2, LJ_TISNUM
-+ | mov TMP1, RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ | movsd TMPQ, xmm0
-+ |.endif
-+ | lea RCa, TMPQ // Store temp. TValue in TMPQ.
-+ | jmp >1
-+ |
-+ |->vmeta_tsetv:
-+ | movzx RC, PC_RC // Reload TValue *k from RC.
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | movzx RB, PC_RB // Reload TValue *t from RB.
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RB
-+ | mov CARG3, RCa // May be 64 bit ptr to stack.
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
-+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz >3
-+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ |->cont_nop: // BASE = base, (RC = result)
-+ | ins_next
-+ |
-+ |3: // Call __newindex metamethod.
-+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
-+ | mov RA, L:RB->top
-+ | mov [RA-12], PC // [cont|PC]
-+ | movzx RC, PC_RA
-+ | // Copy value to third argument.
-+ |.if X64
-+ | mov RBa, [BASE+RC*8]
-+ | mov [RA+16], RBa
-+ |.else
-+ | mov RB, [BASE+RC*8+4]
-+ | mov RC, [BASE+RC*8]
-+ | mov [RA+20], RB
-+ | mov [RA+16], RC
-+ |.endif
-+ | lea PC, [RA+FRAME_CONT]
-+ | sub PC, BASE
-+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
-+ | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
-+ | jmp ->vm_call_dispatch_f
-+ |
-+ |->vmeta_tsetr:
-+ |.if X64WIN
-+ | mov L:CARG1d, SAVE_L
-+ | mov CARG3d, RC
-+ | mov L:CARG1d->base, BASE
-+ | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
-+ |.elif X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov CARG2d, TAB:RB
-+ | mov L:CARG1d->base, BASE
-+ | mov RB, BASE // Save BASE.
-+ | mov CARG3d, RC // Caveat: CARG3d == BASE.
-+ |.else
-+ | mov L:RA, SAVE_L
-+ | mov ARG2, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov ARG3, RC
-+ | mov ARG1, L:RA
-+ | mov L:RA->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
-+ | // TValue * returned in eax (RC).
-+ | movzx RA, PC_RA
-+ | mov BASE, RB // Restore BASE.
-+ | jmp ->BC_TSETR_Z
-+ |
-+ |//-- Comparison metamethods ---------------------------------------------
-+ |
-+ |->vmeta_comp:
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
-+ |.if X64WIN
-+ | lea CARG3d, [BASE+RD*8]
-+ | lea CARG2d, [BASE+RA*8]
-+ |.else
-+ | lea CARG2d, [BASE+RA*8]
-+ | lea CARG3d, [BASE+RD*8]
-+ |.endif
-+ | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
-+ | movzx CARG4d, PC_OP
-+ |.else
-+ | movzx RB, PC_OP
-+ | lea RD, [BASE+RD*8]
-+ | lea RA, [BASE+RA*8]
-+ | mov ARG4, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ |3:
-+ | mov BASE, L:RB->base
-+ | cmp RC, 1
-+ | ja ->vmeta_binop
-+ |4:
-+ | lea PC, [PC+4]
-+ | jb >6
-+ |5:
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |6:
-+ | ins_next
-+ |
-+ |->cont_condt: // BASE = base, RC = result
-+ | add PC, 4
-+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
-+ | jb <5
-+ | jmp <6
-+ |
-+ |->cont_condf: // BASE = base, RC = result
-+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
-+ | jmp <4
-+ |
-+ |->vmeta_equal:
-+ | sub PC, 4
-+ |.if X64WIN
-+ | mov CARG3d, RD
-+ | mov CARG4d, RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-+ | mov CARG2d, RA
-+ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-+ |.elif X64
-+ | mov CARG2d, RA
-+ | mov CARG4d, RB // Caveat: CARG4d == RA.
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
-+ | mov CARG3d, RD
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG4, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG2, RA
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ | jmp <3
-+ |
-+ |->vmeta_equal_cd:
-+ |.if FFI
-+ | sub PC, 4
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG1, L:RB
-+ | mov FCARG2, dword [PC-4]
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
-+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vmeta_istype:
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RA
-+ | movzx CARG3d, PC_RD
-+ | mov L:CARG1d, L:RB
-+ |.else
-+ | movzx RD, PC_RD
-+ | mov ARG2, RA
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RD
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
-+ | mov BASE, L:RB->base
-+ | jmp <6
-+ |
-+ |//-- Arithmetic metamethods ---------------------------------------------
-+ |
-+ |->vmeta_arith_vno:
-+ |.if DUALNUM
-+ | movzx RB, PC_RB
-+ |.endif
-+ |->vmeta_arith_vn:
-+ | lea RC, [KBASE+RC*8]
-+ | jmp >1
-+ |
-+ |->vmeta_arith_nvo:
-+ |.if DUALNUM
-+ | movzx RC, PC_RC
-+ |.endif
-+ |->vmeta_arith_nv:
-+ | lea RC, [KBASE+RC*8]
-+ | lea RB, [BASE+RB*8]
-+ | xchg RB, RC
-+ | jmp >2
-+ |
-+ |->vmeta_unm:
-+ | lea RC, [BASE+RD*8]
-+ | mov RB, RC
-+ | jmp >2
-+ |
-+ |->vmeta_arith_vvo:
-+ |.if DUALNUM
-+ | movzx RB, PC_RB
-+ |.endif
-+ |->vmeta_arith_vv:
-+ | lea RC, [BASE+RC*8]
-+ |1:
-+ | lea RB, [BASE+RB*8]
-+ |2:
-+ | lea RA, [BASE+RA*8]
-+ |.if X64WIN
-+ | mov CARG3d, RB
-+ | mov CARG4d, RC
-+ | movzx RC, PC_OP
-+ | mov ARG5d, RC
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
-+ | mov CARG2d, RA
-+ | mov CARG1d, L:RB // Caveat: CARG1d == RA.
-+ |.elif X64
-+ | movzx CARG5d, PC_OP
-+ | mov CARG2d, RA
-+ | mov CARG4d, RC // Caveat: CARG4d == RA.
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
-+ | mov CARG3d, RB
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG3, RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG4, RC
-+ | movzx RC, PC_OP
-+ | mov ARG2, RA
-+ | mov ARG5, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
-+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jz ->cont_nop
-+ |
-+ | // Call metamethod for binary op.
-+ |->vmeta_binop:
-+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
-+ | mov RA, RC
-+ | sub RC, BASE
-+ | mov [RA-12], PC // [cont|PC]
-+ | lea PC, [RC+FRAME_CONT]
-+ | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
-+ | jmp ->vm_call_dispatch
-+ |
-+ |->vmeta_len:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
-+ | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+#if LJ_52
-+ | test RC, RC
-+ | jne ->vmeta_binop // Binop call for compatibility.
-+ | movzx RD, PC_RD
-+ | mov TAB:FCARG1, [BASE+RD*8]
-+ | jmp ->BC_LEN_Z
-+#else
-+ | jmp ->vmeta_binop // Binop call for compatibility.
-+#endif
-+ |
-+ |//-- Call metamethod ----------------------------------------------------
-+ |
-+ |->vmeta_call_ra:
-+ | lea RA, [BASE+RA*8+8]
-+ |->vmeta_call: // Resolve and call __call metamethod.
-+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
-+ | mov TMP2, RA // Save RA, RC for us.
-+ | mov TMP1, NARGS:RD
-+ | sub RA, 8
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, RA
-+ | lea CARG3d, [RA+NARGS:RD*8]
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | lea RC, [RA+NARGS:RD*8]
-+ | mov L:RB, SAVE_L
-+ | mov ARG2, RA
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE // This is the callers base!
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
-+ | mov BASE, L:RB->base
-+ | mov RA, TMP2
-+ | mov NARGS:RD, TMP1
-+ | mov LFUNC:RB, [RA-8]
-+ | add NARGS:RD, 1
-+ | // This is fragile. L->base must not move, KBASE must always be defined.
-+ | cmp KBASE, BASE // Continue with CALLT if flag set.
-+ | je ->BC_CALLT_Z
-+ | mov BASE, RA
-+ | ins_call // Otherwise call resolved metamethod.
-+ |
-+ |//-- Argument coercion for 'for' statement ------------------------------
-+ |
-+ |->vmeta_for:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, RA // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
-+ | mov BASE, L:RB->base
-+ | mov RC, [PC-4]
-+ | movzx RA, RCH
-+ | movzx OP, RCL
-+ | shr RC, 16
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
-+ |.else
-+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Fast functions -----------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |.macro .ffunc, name
-+ |->ff_ .. name:
-+ |.endmacro
-+ |
-+ |.macro .ffunc_1, name
-+ |->ff_ .. name:
-+ | cmp NARGS:RD, 1+1; jb ->fff_fallback
-+ |.endmacro
-+ |
-+ |.macro .ffunc_2, name
-+ |->ff_ .. name:
-+ | cmp NARGS:RD, 2+1; jb ->fff_fallback
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nsse, name, op
-+ | .ffunc_1 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | op xmm0, qword [BASE]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nsse, name
-+ | .ffunc_nsse name, movsd
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nnsse, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | movsd xmm0, qword [BASE]
-+ | movsd xmm1, qword [BASE+8]
-+ |.endmacro
-+ |
-+ |.macro .ffunc_nnr, name
-+ | .ffunc_2 name
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
-+ | fld qword [BASE+8]
-+ | fld qword [BASE]
-+ |.endmacro
-+ |
-+ |// Inlined GC threshold check. Caveat: uses label 1.
-+ |.macro ffgccheck
-+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | jb >1
-+ | call ->fff_gcstep
-+ |1:
-+ |.endmacro
-+ |
-+ |//-- Base library: checks -----------------------------------------------
-+ |
-+ |.ffunc_1 assert
-+ | mov RB, [BASE+4]
-+ | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD
-+ | mov [BASE-4], RB
-+ | mov RB, [BASE]
-+ | mov [BASE-8], RB
-+ | sub RD, 2
-+ | jz >2
-+ | mov RA, BASE
-+ |1:
-+ | add RA, 8
-+ |.if X64
-+ | mov RBa, [RA]
-+ | mov [RA-8], RBa
-+ |.else
-+ | mov RB, [RA+4]
-+ | mov [RA-4], RB
-+ | mov RB, [RA]
-+ | mov [RA-8], RB
-+ |.endif
-+ | sub RD, 1
-+ | jnz <1
-+ |2:
-+ | mov RD, MULTRES
-+ | jmp ->fff_res_
-+ |
-+ |.ffunc_1 type
-+ | mov RB, [BASE+4]
-+ |.if X64
-+ | mov RA, RB
-+ | sar RA, 15
-+ | cmp RA, -2
-+ | je >3
-+ |.endif
-+ | mov RC, ~LJ_TNUMX
-+ | not RB
-+ | cmp RC, RB
-+ | cmova RC, RB
-+ |2:
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RC
-+ | jmp ->fff_res1
-+ |.if X64
-+ |3:
-+ | mov RC, ~LJ_TLIGHTUD
-+ | jmp <2
-+ |.endif
-+ |
-+ |//-- Base library: getters and setters ---------------------------------
-+ |
-+ |.ffunc_1 getmetatable
-+ | mov RB, [BASE+4]
-+ | mov PC, [BASE-4]
-+ | cmp RB, LJ_TTAB; jne >6
-+ |1: // Field metatable must be at same offset for GCtab and GCudata!
-+ | mov TAB:RB, [BASE]
-+ | mov TAB:RB, TAB:RB->metatable
-+ |2:
-+ | test TAB:RB, TAB:RB
-+ | mov dword [BASE-4], LJ_TNIL
-+ | jz ->fff_res1
-+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
-+ | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
-+ | mov [BASE-8], TAB:RB
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | add NODE:RA, TAB:RB->node
-+ |3: // Rearranged logic, because we expect _not_ to find the key.
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >4
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | je >5
-+ |4:
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <3
-+ | jmp ->fff_res1 // Not found, keep default result.
-+ |5:
-+ | mov RB, [RA+4]
-+ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
-+ | mov RC, [RA]
-+ | mov [BASE-4], RB // Return value of mt.__metatable.
-+ | mov [BASE-8], RC
-+ | jmp ->fff_res1
-+ |
-+ |6:
-+ | cmp RB, LJ_TUDATA; je <1
-+ |.if X64
-+ | cmp RB, LJ_TNUMX; ja >8
-+ | cmp RB, LJ_TISNUM; jbe >7
-+ | mov RB, LJ_TLIGHTUD
-+ | jmp >8
-+ |7:
-+ |.else
-+ | cmp RB, LJ_TISNUM; ja >8
-+ |.endif
-+ | mov RB, LJ_TNUMX
-+ |8:
-+ | not RB
-+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
-+ | jmp <2
-+ |
-+ |.ffunc_2 setmetatable
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | // Fast path: no mt for table yet and not clearing the mt.
-+ | mov TAB:RB, [BASE]
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
-+ | mov TAB:RC, [BASE+8]
-+ | mov TAB:RB->metatable, TAB:RC
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TTAB // Return original table.
-+ | mov [BASE-8], TAB:RB
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jz >1
-+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
-+ | barrierback TAB:RB, RC
-+ |1:
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc_2 rawget
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ |.if X64WIN
-+ | mov RB, BASE // Save BASE.
-+ | lea CARG3d, [BASE+8]
-+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-+ | mov CARG1d, SAVE_L
-+ |.elif X64
-+ | mov RB, BASE // Save BASE.
-+ | mov CARG2d, [BASE]
-+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-+ | mov CARG1d, SAVE_L
-+ |.else
-+ | mov TAB:RD, [BASE]
-+ | mov L:RB, SAVE_L
-+ | mov ARG2, TAB:RD
-+ | mov ARG1, L:RB
-+ | mov RB, BASE // Save BASE.
-+ | add BASE, 8
-+ | mov ARG3, BASE
-+ |.endif
-+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
-+ | // cTValue * returned in eax (RD).
-+ | mov BASE, RB // Restore BASE.
-+ | // Copy table slot.
-+ |.if X64
-+ | mov RBa, [RD]
-+ | mov PC, [BASE-4]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [RD]
-+ | mov RD, [RD+4]
-+ | mov PC, [BASE-4]
-+ | mov [BASE-8], RB
-+ | mov [BASE-4], RD
-+ |.endif
-+ | jmp ->fff_res1
-+ |
-+ |//-- Base library: conversions ------------------------------------------
-+ |
-+ |.ffunc tonumber
-+ | // Only handles the number case inline (without a base argument).
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >1
-+ | mov RB, dword [BASE]; jmp ->fff_resi
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
-+ |
-+ |.ffunc_1 tostring
-+ | // Only handles the string or number case inline.
-+ | mov PC, [BASE-4]
-+ | cmp dword [BASE+4], LJ_TSTR; jne >3
-+ | // A __tostring method in the string base metatable is ignored.
-+ | mov STR:RD, [BASE]
-+ |2:
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RD
-+ | jmp ->fff_res1
-+ |3: // Handle numbers inline, unless a number base metatable is present.
-+ | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
-+ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
-+ | jne ->fff_fallback
-+ | ffgccheck // Caveat: uses label 1.
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Add frame since C call can throw.
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ |.if X64 and not X64WIN
-+ | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
-+ |.endif
-+ | mov L:FCARG1, L:RB
-+ |.if DUALNUM
-+ | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
-+ |.else
-+ | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
-+ |.endif
-+ | // GCstr returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | jmp <2
-+ |
-+ |//-- Base library: iterators -------------------------------------------
-+ |
-+ |.ffunc_1 next
-+ | je >2 // Missing 2nd arg?
-+ |1:
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Add frame since C call can throw.
-+ | mov L:RB->top, BASE // Dummy frame length is ok.
-+ | mov PC, [BASE-4]
-+ |.if X64WIN
-+ | lea CARG3d, [BASE+8]
-+ | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
-+ | mov CARG1d, L:RB
-+ |.elif X64
-+ | mov CARG2d, [BASE]
-+ | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov TAB:RD, [BASE]
-+ | mov ARG2, TAB:RD
-+ | mov ARG1, L:RB
-+ | add BASE, 8
-+ | mov ARG3, BASE
-+ |.endif
-+ | mov SAVE_PC, PC // Needed for ITERN fallback.
-+ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
-+ | // Flag returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | test RD, RD; jz >3 // End of traversal?
-+ | // Copy key and value to results.
-+ |.if X64
-+ | mov RBa, [BASE+8]
-+ | mov RDa, [BASE+16]
-+ | mov [BASE-8], RBa
-+ | mov [BASE], RDa
-+ |.else
-+ | mov RB, [BASE+8]
-+ | mov RD, [BASE+12]
-+ | mov [BASE-8], RB
-+ | mov [BASE-4], RD
-+ | mov RB, [BASE+16]
-+ | mov RD, [BASE+20]
-+ | mov [BASE], RB
-+ | mov [BASE+4], RD
-+ |.endif
-+ |->fff_res2:
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |2: // Set missing 2nd arg to nil.
-+ | mov dword [BASE+12], LJ_TNIL
-+ | jmp <1
-+ |3: // End of traversal: return nil.
-+ | mov dword [BASE-4], LJ_TNIL
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc_1 pairs
-+ | mov TAB:RB, [BASE]
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+#if LJ_52
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+#endif
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TFUNC
-+ | mov [BASE-8], CFUNC:RD
-+ | mov dword [BASE+12], LJ_TNIL
-+ | mov RD, 1+3
-+ | jmp ->fff_res
-+ |
-+ |.ffunc_2 ipairs_aux
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | mov PC, [BASE-4]
-+ |.if DUALNUM
-+ | mov RD, dword [BASE+8]
-+ | add RD, 1
-+ | mov dword [BASE-4], LJ_TISNUM
-+ | mov dword [BASE-8], RD
-+ |.else
-+ | movsd xmm0, qword [BASE+8]
-+ | sseconst_1 xmm1, RBa
-+ | addsd xmm0, xmm1
-+ | cvttsd2si RD, xmm0
-+ | movsd qword [BASE-8], xmm0
-+ |.endif
-+ | mov TAB:RB, [BASE]
-+ | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
-+ | shl RD, 3
-+ | add RD, TAB:RB->array
-+ |1:
-+ | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
-+ | // Copy array slot.
-+ |.if X64
-+ | mov RBa, [RD]
-+ | mov [BASE], RBa
-+ |.else
-+ | mov RB, [RD]
-+ | mov RD, [RD+4]
-+ | mov [BASE], RB
-+ | mov [BASE+4], RD
-+ |.endif
-+ | jmp ->fff_res2
-+ |2: // Check for empty hash part first. Otherwise call C function.
-+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
-+ | mov FCARG1, TAB:RB
-+ | mov RB, BASE // Save BASE.
-+ | mov FCARG2, RD // Caveat: FCARG2 == BASE
-+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
-+ | // cTValue * or NULL returned in eax (RD).
-+ | mov BASE, RB
-+ | test RD, RD
-+ | jnz <1
-+ |->fff_res0:
-+ | mov RD, 1+0
-+ | jmp ->fff_res
-+ |
-+ |.ffunc_1 ipairs
-+ | mov TAB:RB, [BASE]
-+ | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
-+#if LJ_52
-+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
-+#endif
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov CFUNC:RD, CFUNC:RB->upvalue[0]
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TFUNC
-+ | mov [BASE-8], CFUNC:RD
-+ |.if DUALNUM
-+ | mov dword [BASE+12], LJ_TISNUM
-+ | mov dword [BASE+8], 0
-+ |.else
-+ | xorps xmm0, xmm0
-+ | movsd qword [BASE+8], xmm0
-+ |.endif
-+ | mov RD, 1+3
-+ | jmp ->fff_res
-+ |
-+ |//-- Base library: catch errors ----------------------------------------
-+ |
-+ |.ffunc_1 pcall
-+ | lea RA, [BASE+8]
-+ | sub NARGS:RD, 1
-+ | mov PC, 8+FRAME_PCALL
-+ |1:
-+ | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | shr RB, HOOK_ACTIVE_SHIFT
-+ | and RB, 1
-+ | add PC, RB // Remember active hook before pcall.
-+ | jmp ->vm_call_dispatch
-+ |
-+ |.ffunc_2 xpcall
-+ | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
-+ | mov RB, [BASE+4] // Swap function and traceback.
-+ | mov [BASE+12], RB
-+ | mov dword [BASE+4], LJ_TFUNC
-+ | mov LFUNC:RB, [BASE]
-+ | mov PC, [BASE+8]
-+ | mov [BASE+8], LFUNC:RB
-+ | mov [BASE], PC
-+ | lea RA, [BASE+16]
-+ | sub NARGS:RD, 2
-+ | mov PC, 16+FRAME_PCALL
-+ | jmp <1
-+ |
-+ |//-- Coroutine library --------------------------------------------------
-+ |
-+ |.macro coroutine_resume_wrap, resume
-+ |.if resume
-+ |.ffunc_1 coroutine_resume
-+ | mov L:RB, [BASE]
-+ |.else
-+ |.ffunc coroutine_wrap_aux
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov L:RB, CFUNC:RB->upvalue[0].gcr
-+ |.endif
-+ | mov PC, [BASE-4]
-+ | mov SAVE_PC, PC
-+ |.if X64
-+ | mov TMP1, L:RB
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ |.if resume
-+ | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
-+ |.endif
-+ | cmp aword L:RB->cframe, 0; jne ->fff_fallback
-+ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
-+ | mov RA, L:RB->top
-+ | je >1 // Status != LUA_YIELD (i.e. 0)?
-+ | cmp RA, L:RB->base // Check for presence of initial func.
-+ | je ->fff_fallback
-+ |1:
-+ |.if resume
-+ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
-+ |.else
-+ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
-+ |.endif
-+ | cmp PC, L:RB->maxstack; ja ->fff_fallback
-+ | mov L:RB->top, PC
-+ |
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ |.if resume
-+ | add BASE, 8 // Keep resumed thread in stack for GC.
-+ |.endif
-+ | mov L:RB->top, BASE
-+ |.if resume
-+ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
-+ |.else
-+ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
-+ |.endif
-+ | sub RBa, PCa // Relative to PC.
-+ |
-+ | cmp PC, RA
-+ | je >3
-+ |2: // Move args to coroutine.
-+ |.if X64
-+ | mov RCa, [PC+RB]
-+ | mov [PC-8], RCa
-+ |.else
-+ | mov RC, [PC+RB+4]
-+ | mov [PC-4], RC
-+ | mov RC, [PC+RB]
-+ | mov [PC-8], RC
-+ |.endif
-+ | sub PC, 8
-+ | cmp PC, RA
-+ | jne <2
-+ |3:
-+ |.if X64
-+ | mov CARG2d, RA
-+ | mov CARG1d, TMP1
-+ |.else
-+ | mov ARG2, RA
-+ | xor RA, RA
-+ | mov ARG4, RA
-+ | mov ARG3, RA
-+ |.endif
-+ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
-+ |
-+ | mov L:RB, SAVE_L
-+ |.if X64
-+ | mov L:PC, TMP1
-+ |.else
-+ | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ |
-+ | cmp eax, LUA_YIELD
-+ | ja >8
-+ |4:
-+ | mov RA, L:PC->base
-+ | mov KBASE, L:PC->top
-+ | mov L:PC->top, RA // Clear coroutine stack.
-+ | mov PC, KBASE
-+ | sub PC, RA
-+ | je >6 // No results?
-+ | lea RD, [BASE+PC]
-+ | shr PC, 3
-+ | cmp RD, L:RB->maxstack
-+ | ja >9 // Need to grow stack?
-+ |
-+ | mov RB, BASE
-+ | sub RBa, RAa
-+ |5: // Move results from coroutine.
-+ |.if X64
-+ | mov RDa, [RA]
-+ | mov [RA+RB], RDa
-+ |.else
-+ | mov RD, [RA]
-+ | mov [RA+RB], RD
-+ | mov RD, [RA+4]
-+ | mov [RA+RB+4], RD
-+ |.endif
-+ | add RA, 8
-+ | cmp RA, KBASE
-+ | jne <5
-+ |6:
-+ |.if resume
-+ | lea RD, [PC+2] // nresults+1 = 1 + true + results.
-+ | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
-+ |.else
-+ | lea RD, [PC+1] // nresults+1 = 1 + results.
-+ |.endif
-+ |7:
-+ | mov PC, SAVE_PC
-+ | mov MULTRES, RD
-+ |.if resume
-+ | mov RAa, -8
-+ |.else
-+ | xor RA, RA
-+ |.endif
-+ | test PC, FRAME_TYPE
-+ | jz ->BC_RET_Z
-+ | jmp ->vm_return
-+ |
-+ |8: // Coroutine returned with error (at co->top-1).
-+ |.if resume
-+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
-+ | mov RA, L:PC->top
-+ | sub RA, 8
-+ | mov L:PC->top, RA // Clear error from coroutine stack.
-+ | // Copy error message.
-+ |.if X64
-+ | mov RDa, [RA]
-+ | mov [BASE], RDa
-+ |.else
-+ | mov RD, [RA]
-+ | mov [BASE], RD
-+ | mov RD, [RA+4]
-+ | mov [BASE+4], RD
-+ |.endif
-+ | mov RD, 1+2 // nresults+1 = 1 + false + error.
-+ | jmp <7
-+ |.else
-+ | mov FCARG2, L:PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
-+ | // Error function does not return.
-+ |.endif
-+ |
-+ |9: // Handle stack expansion on return from yield.
-+ |.if X64
-+ | mov L:RA, TMP1
-+ |.else
-+ | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
-+ |.endif
-+ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
-+ | mov FCARG2, PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ |.if X64
-+ | mov L:PC, TMP1
-+ |.else
-+ | mov L:PC, ARG1
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | jmp <4 // Retry the stack move.
-+ |.endmacro
-+ |
-+ | coroutine_resume_wrap 1 // coroutine.resume
-+ | coroutine_resume_wrap 0 // coroutine.wrap
-+ |
-+ |.ffunc coroutine_yield
-+ | mov L:RB, SAVE_L
-+ | test aword L:RB->cframe, CFRAME_RESUME
-+ | jz ->fff_fallback
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB->top, RD
-+ | xor RD, RD
-+ | mov aword L:RB->cframe, RDa
-+ | mov al, LUA_YIELD
-+ | mov byte L:RB->status, al
-+ | jmp ->vm_leave_unw
-+ |
-+ |//-- Math library -------------------------------------------------------
-+ |
-+ |.if not DUALNUM
-+ |->fff_resi: // Dummy.
-+ |.endif
-+ |
-+ |->fff_resn:
-+ | mov PC, [BASE-4]
-+ | fstp qword [BASE-8]
-+ | jmp ->fff_res1
-+ |
-+ | .ffunc_1 math_abs
-+ |.if DUALNUM
-+ | cmp dword [BASE+4], LJ_TISNUM; jne >2
-+ | mov RB, dword [BASE]
-+ | cmp RB, 0; jns ->fff_resi
-+ | neg RB; js >1
-+ |->fff_resbit:
-+ |->fff_resi:
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TISNUM
-+ | mov dword [BASE-8], RB
-+ | jmp ->fff_res1
-+ |1:
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], 0x41e00000 // 2^31.
-+ | mov dword [BASE-8], 0
-+ | jmp ->fff_res1
-+ |2:
-+ | ja ->fff_fallback
-+ |.else
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ | sseconst_abs xmm1, RDa
-+ | andps xmm0, xmm1
-+ |->fff_resxmm0:
-+ | mov PC, [BASE-4]
-+ | movsd qword [BASE-8], xmm0
-+ | // fallthrough
-+ |
-+ |->fff_res1:
-+ | mov RD, 1+1
-+ |->fff_res:
-+ | mov MULTRES, RD
-+ |->fff_res_:
-+ | test PC, FRAME_TYPE
-+ | jnz >7
-+ |5:
-+ | cmp PC_RB, RDL // More results expected?
-+ | ja >6
-+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
-+ | movzx RA, PC_RA
-+ | not RAa // Note: ~RA = -(RA+1)
-+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ | mov dword [BASE+RD*8-12], LJ_TNIL
-+ | add RD, 1
-+ | jmp <5
-+ |
-+ |7: // Non-standard return case.
-+ | mov RAa, -8 // Results start at BASE+RA = BASE-8.
-+ | jmp ->vm_return
-+ |
-+ |.if X64
-+ |.define fff_resfp, fff_resxmm0
-+ |.else
-+ |.define fff_resfp, fff_resn
-+ |.endif
-+ |
-+ |.macro math_round, func
-+ | .ffunc math_ .. func
-+ |.if DUALNUM
-+ | cmp dword [BASE+4], LJ_TISNUM; jne >1
-+ | mov RB, dword [BASE]; jmp ->fff_resi
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ | call ->vm_ .. func .. _sse
-+ |.if DUALNUM
-+ | cvttsd2si RB, xmm0
-+ | cmp RB, 0x80000000
-+ | jne ->fff_resi
-+ | cvtsi2sd xmm1, RB
-+ | ucomisd xmm0, xmm1
-+ | jp ->fff_resxmm0
-+ | je ->fff_resi
-+ |.endif
-+ | jmp ->fff_resxmm0
-+ |.endmacro
-+ |
-+ | math_round floor
-+ | math_round ceil
-+ |
-+ |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
-+ |
-+ |.ffunc math_log
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
-+ | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
-+ | movsd xmm0, qword [BASE]
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ |.endif
-+ | mov RB, BASE
-+ | call extern log
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |
-+ |.macro math_extern, func
-+ | .ffunc_nsse math_ .. func
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ |.endif
-+ | mov RB, BASE
-+ | call extern func
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |.endmacro
-+ |
-+ |.macro math_extern2, func
-+ | .ffunc_nnsse math_ .. func
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ |.endif
-+ | mov RB, BASE
-+ | call extern func
-+ | mov BASE, RB
-+ | jmp ->fff_resfp
-+ |.endmacro
-+ |
-+ | math_extern log10
-+ | math_extern exp
-+ | math_extern sin
-+ | math_extern cos
-+ | math_extern tan
-+ | math_extern asin
-+ | math_extern acos
-+ | math_extern atan
-+ | math_extern sinh
-+ | math_extern cosh
-+ | math_extern tanh
-+ | math_extern2 pow
-+ | math_extern2 atan2
-+ | math_extern2 fmod
-+ |
-+ |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
-+ |
-+ |.ffunc_1 math_frexp
-+ | mov RB, [BASE+4]
-+ | cmp RB, LJ_TISNUM; jae ->fff_fallback
-+ | mov PC, [BASE-4]
-+ | mov RC, [BASE]
-+ | mov [BASE-4], RB; mov [BASE-8], RC
-+ | shl RB, 1; cmp RB, 0xffe00000; jae >3
-+ | or RC, RB; jz >3
-+ | mov RC, 1022
-+ | cmp RB, 0x00200000; jb >4
-+ |1:
-+ | shr RB, 21; sub RB, RC // Extract and unbias exponent.
-+ | cvtsi2sd xmm0, RB
-+ | mov RB, [BASE-4]
-+ | and RB, 0x800fffff // Mask off exponent.
-+ | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
-+ | mov [BASE-4], RB
-+ |2:
-+ | movsd qword [BASE], xmm0
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
-+ | xorps xmm0, xmm0; jmp <2
-+ |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
-+ | movsd xmm0, qword [BASE]
-+ | sseconst_hi xmm1, RBa, 43500000 // 2^54.
-+ | mulsd xmm0, xmm1
-+ | movsd qword [BASE-8], xmm0
-+ | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
-+ |
-+ |.ffunc_nsse math_modf
-+ | mov RB, [BASE+4]
-+ | mov PC, [BASE-4]
-+ | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
-+ | movaps xmm4, xmm0
-+ | call ->vm_trunc_sse
-+ | subsd xmm4, xmm0
-+ |1:
-+ | movsd qword [BASE-8], xmm0
-+ | movsd qword [BASE], xmm4
-+ | mov RC, [BASE-4]; mov RB, [BASE+4]
-+ | xor RC, RB; js >3 // Need to adjust sign?
-+ |2:
-+ | mov RD, 1+2
-+ | jmp ->fff_res
-+ |3:
-+ | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
-+ | jmp <2
-+ |4:
-+ | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
-+ |
-+ |.macro math_minmax, name, cmovop, sseop
-+ | .ffunc name
-+ | mov RA, 2
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >4
-+ | mov RB, dword [BASE]
-+ |1: // Handle integers.
-+ | cmp RA, RD; jae ->fff_resi
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
-+ | cmp RB, dword [BASE+RA*8-8]
-+ | cmovop RB, dword [BASE+RA*8-8]
-+ | add RA, 1
-+ | jmp <1
-+ |3:
-+ | ja ->fff_fallback
-+ | // Convert intermediate result to number and continue below.
-+ | cvtsi2sd xmm0, RB
-+ | jmp >6
-+ |4:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ |
-+ | movsd xmm0, qword [BASE]
-+ |5: // Handle numbers or integers.
-+ | cmp RA, RD; jae ->fff_resxmm0
-+ | cmp dword [BASE+RA*8-4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jb >6
-+ | ja ->fff_fallback
-+ | cvtsi2sd xmm1, dword [BASE+RA*8-8]
-+ | jmp >7
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ |6:
-+ | movsd xmm1, qword [BASE+RA*8-8]
-+ |7:
-+ | sseop xmm0, xmm1
-+ | add RA, 1
-+ | jmp <5
-+ |.endmacro
-+ |
-+ | math_minmax math_min, cmovg, minsd
-+ | math_minmax math_max, cmovl, maxsd
-+ |
-+ |//-- String library -----------------------------------------------------
-+ |
-+ |.ffunc string_byte // Only handle the 1-arg case here.
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | mov STR:RB, [BASE]
-+ | mov PC, [BASE-4]
-+ | cmp dword STR:RB->len, 1
-+ | jb ->fff_res0 // Return no results for empty string.
-+ | movzx RB, byte STR:RB[1]
-+ |.if DUALNUM
-+ | jmp ->fff_resi
-+ |.else
-+ | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
-+ |.endif
-+ |
-+ |.ffunc string_char // Only handle the 1-arg case here.
-+ | ffgccheck
-+ | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ | mov RB, dword [BASE]
-+ | cmp RB, 255; ja ->fff_fallback
-+ | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | cvttsd2si RB, qword [BASE]
-+ | cmp RB, 255; ja ->fff_fallback
-+ | mov TMP2, RB
-+ |.endif
-+ |.if X64
-+ | mov TMP3, 1
-+ |.else
-+ | mov ARG3, 1
-+ |.endif
-+ | lea RDa, TMP2 // Points to stack. Little-endian.
-+ |->fff_newstr:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ |.if X64
-+ | mov CARG3d, TMP3 // Zero-extended to size_t.
-+ | mov CARG2, RDa // May be 64 bit ptr to stack.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG2, RD
-+ | mov ARG1, L:RB
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
-+ |->fff_resstr:
-+ | // GCstr * returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | mov PC, [BASE-4]
-+ | mov dword [BASE-4], LJ_TSTR
-+ | mov [BASE-8], STR:RD
-+ | jmp ->fff_res1
-+ |
-+ |.ffunc string_sub
-+ | ffgccheck
-+ | mov TMP2, -1
-+ | cmp NARGS:RD, 1+2; jb ->fff_fallback
-+ | jna >1
-+ | cmp dword [BASE+20], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ | mov RB, dword [BASE+16]
-+ | mov TMP2, RB
-+ |.else
-+ | jae ->fff_fallback
-+ | cvttsd2si RB, qword [BASE+16]
-+ | mov TMP2, RB
-+ |.endif
-+ |1:
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | cmp dword [BASE+12], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | mov STR:RB, [BASE]
-+ | mov TMP3, STR:RB
-+ | mov RB, STR:RB->len
-+ |.if DUALNUM
-+ | mov RA, dword [BASE+8]
-+ |.else
-+ | cvttsd2si RA, qword [BASE+8]
-+ |.endif
-+ | mov RC, TMP2
-+ | cmp RB, RC // len < end? (unsigned compare)
-+ | jb >5
-+ |2:
-+ | test RA, RA // start <= 0?
-+ | jle >7
-+ |3:
-+ | mov STR:RB, TMP3
-+ | sub RC, RA // start > end?
-+ | jl ->fff_emptystr
-+ | lea RB, [STR:RB+RA+#STR-1]
-+ | add RC, 1
-+ |4:
-+ |.if X64
-+ | mov TMP3, RC
-+ |.else
-+ | mov ARG3, RC
-+ |.endif
-+ | mov RD, RB
-+ | jmp ->fff_newstr
-+ |
-+ |5: // Negative end or overflow.
-+ | jl >6
-+ | lea RC, [RC+RB+1] // end = end+(len+1)
-+ | jmp <2
-+ |6: // Overflow.
-+ | mov RC, RB // end = len
-+ | jmp <2
-+ |
-+ |7: // Negative start or underflow.
-+ | je >8
-+ | add RA, RB // start = start+(len+1)
-+ | add RA, 1
-+ | jg <3 // start > 0?
-+ |8: // Underflow.
-+ | mov RA, 1 // start = 1
-+ | jmp <3
-+ |
-+ |->fff_emptystr: // Range underflow.
-+ | xor RC, RC // Zero length. Any ptr in RB is ok.
-+ | jmp <4
-+ |
-+ |.macro ffstring_op, name
-+ | .ffunc_1 string_ .. name
-+ | ffgccheck
-+ | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
-+ | mov L:RB, SAVE_L
-+ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
-+ | mov L:RB->base, BASE
-+ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
-+ | mov RC, SBUF:FCARG1->b
-+ | mov SBUF:FCARG1->L, L:RB
-+ | mov SBUF:FCARG1->p, RC
-+ | mov SAVE_PC, PC
-+ | call extern lj_buf_putstr_ .. name .. @8
-+ | mov FCARG1, eax
-+ | call extern lj_buf_tostr@4
-+ | jmp ->fff_resstr
-+ |.endmacro
-+ |
-+ |ffstring_op reverse
-+ |ffstring_op lower
-+ |ffstring_op upper
-+ |
-+ |//-- Bit library --------------------------------------------------------
-+ |
-+ |.macro .ffunc_bit, name, kind, fdef
-+ | fdef name
-+ |.if kind == 2
-+ | sseconst_tobit xmm1, RBa
-+ |.endif
-+ | cmp dword [BASE+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >1
-+ | mov RB, dword [BASE]
-+ |.if kind > 0
-+ | jmp >2
-+ |.else
-+ | jmp ->fff_resbit
-+ |.endif
-+ |1:
-+ | ja ->fff_fallback
-+ |.else
-+ | jae ->fff_fallback
-+ |.endif
-+ | movsd xmm0, qword [BASE]
-+ |.if kind < 2
-+ | sseconst_tobit xmm1, RBa
-+ |.endif
-+ | addsd xmm0, xmm1
-+ | movd RB, xmm0
-+ |2:
-+ |.endmacro
-+ |
-+ |.macro .ffunc_bit, name, kind
-+ | .ffunc_bit name, kind, .ffunc_1
-+ |.endmacro
-+ |
-+ |.ffunc_bit bit_tobit, 0
-+ | jmp ->fff_resbit
-+ |
-+ |.macro .ffunc_bit_op, name, ins
-+ | .ffunc_bit name, 2
-+ | mov TMP2, NARGS:RD // Save for fallback.
-+ | lea RD, [BASE+NARGS:RD*8-16]
-+ |1:
-+ | cmp RD, BASE
-+ | jbe ->fff_resbit
-+ | cmp dword [RD+4], LJ_TISNUM
-+ |.if DUALNUM
-+ | jne >2
-+ | ins RB, dword [RD]
-+ | sub RD, 8
-+ | jmp <1
-+ |2:
-+ | ja ->fff_fallback_bit_op
-+ |.else
-+ | jae ->fff_fallback_bit_op
-+ |.endif
-+ | movsd xmm0, qword [RD]
-+ | addsd xmm0, xmm1
-+ | movd RA, xmm0
-+ | ins RB, RA
-+ | sub RD, 8
-+ | jmp <1
-+ |.endmacro
-+ |
-+ |.ffunc_bit_op bit_band, and
-+ |.ffunc_bit_op bit_bor, or
-+ |.ffunc_bit_op bit_bxor, xor
-+ |
-+ |.ffunc_bit bit_bswap, 1
-+ | bswap RB
-+ | jmp ->fff_resbit
-+ |
-+ |.ffunc_bit bit_bnot, 1
-+ | not RB
-+ |.if DUALNUM
-+ | jmp ->fff_resbit
-+ |.else
-+ |->fff_resbit:
-+ | cvtsi2sd xmm0, RB
-+ | jmp ->fff_resxmm0
-+ |.endif
-+ |
-+ |->fff_fallback_bit_op:
-+ | mov NARGS:RD, TMP2 // Restore for fallback
-+ | jmp ->fff_fallback
-+ |
-+ |.macro .ffunc_bit_sh, name, ins
-+ |.if DUALNUM
-+ | .ffunc_bit name, 1, .ffunc_2
-+ | // Note: no inline conversion from number for 2nd argument!
-+ | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
-+ | mov RA, dword [BASE+8]
-+ |.else
-+ | .ffunc_nnsse name
-+ | sseconst_tobit xmm2, RBa
-+ | addsd xmm0, xmm2
-+ | addsd xmm1, xmm2
-+ | movd RB, xmm0
-+ | movd RA, xmm1
-+ |.endif
-+ | ins RB, cl // Assumes RA is ecx.
-+ | jmp ->fff_resbit
-+ |.endmacro
-+ |
-+ |.ffunc_bit_sh bit_lshift, shl
-+ |.ffunc_bit_sh bit_rshift, shr
-+ |.ffunc_bit_sh bit_arshift, sar
-+ |.ffunc_bit_sh bit_rol, rol
-+ |.ffunc_bit_sh bit_ror, ror
-+ |
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->fff_fallback_2:
-+ | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
-+ | jmp ->fff_fallback
-+ |->fff_fallback_1:
-+ | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
-+ |->fff_fallback: // Call fast function fallback handler.
-+ | // BASE = new base, RD = nargs+1
-+ | mov L:RB, SAVE_L
-+ | mov PC, [BASE-4] // Fallback may overwrite PC.
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
-+ | mov L:RB->top, RD
-+ | mov CFUNC:RD, [BASE-8]
-+ | cmp RA, L:RB->maxstack
-+ | ja >5 // Need to grow stack.
-+ |.if X64
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ | call aword CFUNC:RD->f // (lua_State *L)
-+ | mov BASE, L:RB->base
-+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
-+ | test RD, RD; jg ->fff_res // Returned nresults+1?
-+ |1:
-+ | mov RA, L:RB->top
-+ | sub RA, BASE
-+ | shr RA, 3
-+ | test RD, RD
-+ | lea NARGS:RD, [RA+1]
-+ | mov LFUNC:RB, [BASE-8]
-+ | jne ->vm_call_tail // Returned -1?
-+ | ins_callt // Returned 0: retry fast path.
-+ |
-+ |// Reconstruct previous base for vmeta_call during tailcall.
-+ |->vm_call_tail:
-+ | mov RA, BASE
-+ | test PC, FRAME_TYPE
-+ | jnz >3
-+ | movzx RB, PC_RA
-+ | not RBa // Note: ~RB = -(RB+1)
-+ | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
-+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
-+ |3:
-+ | mov RB, PC
-+ | and RB, -8
-+ | sub BASE, RB
-+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
-+ |
-+ |5: // Grow stack for fallback handler.
-+ | mov FCARG2, LUA_MINSTACK
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | xor RD, RD // Simulate a return 0.
-+ | jmp <1 // Dumb retry (goes through ff first).
-+ |
-+ |->fff_gcstep: // Call GC step function.
-+ | // BASE = new base, RD = nargs+1
-+ | pop RBa // Must keep stack at same level.
-+ | mov TMPa, RBa // Save return address
-+ | mov L:RB, SAVE_L
-+ | mov SAVE_PC, PC // Redundant (but a defined value).
-+ | mov L:RB->base, BASE
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov FCARG1, L:RB
-+ | mov L:RB->top, RD
-+ | call extern lj_gc_step@4 // (lua_State *L)
-+ | mov BASE, L:RB->base
-+ | mov RD, L:RB->top
-+ | sub RD, BASE
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | mov RBa, TMPa
-+ | push RBa // Restore return address.
-+ | ret
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Special dispatch targets -------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->vm_record: // Dispatch target for recording phase.
-+ |.if JIT
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
-+ | jnz >5
-+ | // Decrement the hookcount for consistency, but always do the call.
-+ | test RDL, HOOK_ACTIVE
-+ | jnz >1
-+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-+ | jz >1
-+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-+ | jmp >1
-+ |.endif
-+ |
-+ |->vm_rethook: // Dispatch target for return hooks.
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_ACTIVE // Hook already active?
-+ | jnz >5
-+ | jmp >1
-+ |
-+ |->vm_inshook: // Dispatch target for instr/line hooks.
-+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
-+ | test RDL, HOOK_ACTIVE // Hook already active?
-+ | jnz >5
-+ |
-+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
-+ | jz >5
-+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
-+ | jz >1
-+ | test RDL, LUA_MASKLINE
-+ | jz >5
-+ |1:
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC // Caveat: FCARG2 == BASE
-+ | mov FCARG1, L:RB
-+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-+ | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
-+ |3:
-+ | mov BASE, L:RB->base
-+ |4:
-+ | movzx RA, PC_RA
-+ |5:
-+ | movzx OP, PC_OP
-+ | movzx RD, PC_RD
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
-+ |.else
-+ | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
-+ |.endif
-+ |
-+ |->cont_hook: // Continue from hook yield.
-+ | add PC, 4
-+ | mov RA, [RB-24]
-+ | mov MULTRES, RA // Restore MULTRES for *M ins.
-+ | jmp <4
-+ |
-+ |->vm_hotloop: // Hot loop counter underflow.
-+ |.if JIT
-+ | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
-+ | mov RB, LFUNC:RB->pc
-+ | movzx RD, byte [RB+PC2PROTO(framesize)]
-+ | lea RD, [BASE+RD*8]
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov FCARG2, PC
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | mov SAVE_PC, PC
-+ | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vm_callhook: // Dispatch target for call hooks.
-+ | mov SAVE_PC, PC
-+ |.if JIT
-+ | jmp >1
-+ |.endif
-+ |
-+ |->vm_hotcall: // Hot call counter underflow.
-+ |.if JIT
-+ | mov SAVE_PC, PC
-+ | or PC, 1 // Marker for hot call.
-+ |1:
-+ |.endif
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RD
-+ | mov FCARG2, PC
-+ | mov FCARG1, L:RB
-+ | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
-+ | // ASMFunction returned in eax/rax (RDa).
-+ | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
-+ |.if JIT
-+ | and PC, -2
-+ |.endif
-+ | mov BASE, L:RB->base
-+ | mov RAa, RDa
-+ | mov RD, L:RB->top
-+ | sub RD, BASE
-+ | mov RBa, RAa
-+ | movzx RA, PC_RA
-+ | shr RD, 3
-+ | add NARGS:RD, 1
-+ | jmp RBa
-+ |
-+ |->cont_stitch: // Trace stitching.
-+ |.if JIT
-+ | // BASE = base, RC = result, RB = mbase
-+ | mov TRACE:RA, [RB-24] // Save previous trace.
-+ | mov TMP1, TRACE:RA
-+ | mov TMP3, DISPATCH // Need one more register.
-+ | mov DISPATCH, MULTRES
-+ | movzx RA, PC_RA
-+ | lea RA, [BASE+RA*8] // Call base.
-+ | sub DISPATCH, 1
-+ | jz >2
-+ |1: // Move results down.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [RA], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov [RA], RB
-+ | mov RB, [RC+4]
-+ | mov [RA+4], RB
-+ |.endif
-+ | add RC, 8
-+ | add RA, 8
-+ | sub DISPATCH, 1
-+ | jnz <1
-+ |2:
-+ | movzx RC, PC_RA
-+ | movzx RB, PC_RB
-+ | add RC, RB
-+ | lea RC, [BASE+RC*8-8]
-+ |3:
-+ | cmp RC, RA
-+ | ja >9 // More results wanted?
-+ |
-+ | mov DISPATCH, TMP3
-+ | mov TRACE:RD, TMP1 // Get previous trace.
-+ | movzx RB, word TRACE:RD->traceno
-+ | movzx RD, word TRACE:RD->link
-+ | cmp RD, RB
-+ | je ->cont_nop // Blacklisted.
-+ | test RD, RD
-+ | jne =>BC_JLOOP // Jump to stitched trace.
-+ |
-+ | // Stitch a new trace to the previous trace.
-+ | mov [DISPATCH+DISPATCH_J(exitno)], RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
-+ | mov BASE, L:RB->base
-+ | jmp ->cont_nop
-+ |
-+ |9: // Fill up results with nil.
-+ | mov dword [RA+4], LJ_TNIL
-+ | add RA, 8
-+ | jmp <3
-+ |.endif
-+ |
-+ |->vm_profhook: // Dispatch target for profiler hook.
-+#if LJ_HASPROFILE
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov FCARG2, PC // Caveat: FCARG2 == BASE
-+ | mov FCARG1, L:RB
-+ | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
-+ | mov BASE, L:RB->base
-+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
-+ | sub PC, 4
-+ | jmp ->cont_nop
-+#endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Trace exit handler -------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// Called from an exit stub with the exit number on the stack.
-+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
-+ |->vm_exit_handler:
-+ |.if JIT
-+ |.if X64
-+ | push r13; push r12
-+ | push r11; push r10; push r9; push r8
-+ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
-+ | push rbx; push rdx; push rcx; push rax
-+ | movzx RC, byte [rbp-8] // Reconstruct exit number.
-+ | mov RCH, byte [rbp-16]
-+ | mov [rbp-8], r15; mov [rbp-16], r14
-+ |.else
-+ | push ebp; lea ebp, [esp+12]; push ebp
-+ | push ebx; push edx; push ecx; push eax
-+ | movzx RC, byte [ebp-4] // Reconstruct exit number.
-+ | mov RCH, byte [ebp-8]
-+ | mov [ebp-4], edi; mov [ebp-8], esi
-+ |.endif
-+ | // Caveat: DISPATCH is ebx.
-+ | mov DISPATCH, [ebp]
-+ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
-+ | set_vmstate EXIT
-+ | mov [DISPATCH+DISPATCH_J(exitno)], RC
-+ | mov [DISPATCH+DISPATCH_J(parent)], RA
-+ |.if X64
-+ |.if X64WIN
-+ | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
-+ |.else
-+ | sub rsp, 16*8 // Room for SSE regs.
-+ |.endif
-+ | add rbp, -128
-+ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
-+ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
-+ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
-+ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
-+ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
-+ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
-+ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
-+ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
-+ |.else
-+ | sub esp, 8*8+16 // Room for SSE regs + args.
-+ | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
-+ | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
-+ | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
-+ | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
-+ |.endif
-+ | // Caveat: RB is ebp.
-+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
-+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
-+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
-+ | mov L:RB->base, BASE
-+ |.if X64WIN
-+ | lea CARG2, [rsp+4*8]
-+ |.elif X64
-+ | mov CARG2, rsp
-+ |.else
-+ | lea FCARG2, [esp+16]
-+ |.endif
-+ | lea FCARG1, [DISPATCH+GG_DISP2J]
-+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-+ | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
-+ | // MULTRES or negated error code returned in eax (RD).
-+ | mov RAa, L:RB->cframe
-+ | and RAa, CFRAME_RAWMASK
-+ |.if X64WIN
-+ | // Reposition stack later.
-+ |.elif X64
-+ | mov rsp, RAa // Reposition stack to C frame.
-+ |.else
-+ | mov esp, RAa // Reposition stack to C frame.
-+ |.endif
-+ | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
-+ | mov BASE, L:RB->base
-+ | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
-+ |.if X64
-+ | jmp >1
-+ |.endif
-+ |.endif
-+ |->vm_exit_interp:
-+ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
-+ |.if JIT
-+ |.if X64
-+ | // Restore additional callee-save registers only used in compiled code.
-+ |.if X64WIN
-+ | lea RAa, [rsp+9*16+4*8]
-+ |1:
-+ | movdqa xmm15, [RAa-9*16]
-+ | movdqa xmm14, [RAa-8*16]
-+ | movdqa xmm13, [RAa-7*16]
-+ | movdqa xmm12, [RAa-6*16]
-+ | movdqa xmm11, [RAa-5*16]
-+ | movdqa xmm10, [RAa-4*16]
-+ | movdqa xmm9, [RAa-3*16]
-+ | movdqa xmm8, [RAa-2*16]
-+ | movdqa xmm7, [RAa-1*16]
-+ | mov rsp, RAa // Reposition stack to C frame.
-+ | movdqa xmm6, [RAa]
-+ | mov r15, CSAVE_3
-+ | mov r14, CSAVE_4
-+ |.else
-+ | add rsp, 16 // Reposition stack to C frame.
-+ |1:
-+ |.endif
-+ | mov r13, TMPa
-+ | mov r12, TMPQ
-+ |.endif
-+ | test RD, RD; js >9 // Check for error from exit.
-+ | mov L:RB, SAVE_L
-+ | mov MULTRES, RD
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | mov L:RB->base, BASE
-+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
-+ | set_vmstate INTERP
-+ | // Modified copy of ins_next which handles function header dispatch, too.
-+ | mov RC, [PC]
-+ | movzx RA, RCH
-+ | movzx OP, RCL
-+ | add PC, 4
-+ | shr RC, 16
-+ | cmp OP, BC_FUNCF // Function header?
-+ | jb >3
-+ | cmp OP, BC_FUNCC+2 // Fast function?
-+ | jae >4
-+ |2:
-+ | mov RC, MULTRES // RC/RD holds nres+1.
-+ |3:
-+ |.if X64
-+ | jmp aword [DISPATCH+OP*8]
-+ |.else
-+ | jmp aword [DISPATCH+OP*4]
-+ |.endif
-+ |
-+ |4: // Check frame below fast function.
-+ | mov RC, [BASE-4]
-+ | test RC, FRAME_TYPE
-+ | jnz <2 // Trace stitching continuation?
-+ | // Otherwise set KBASE for Lua function below fast function.
-+ | movzx RC, byte [RC-3]
-+ | not RCa
-+ | mov LFUNC:KBASE, [BASE+RC*8-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | jmp <2
-+ |
-+ |9: // Rethrow error from the right C frame.
-+ | neg RD
-+ | mov FCARG1, L:RB
-+ | mov FCARG2, RD
-+ | call extern lj_err_throw@8 // (lua_State *L, int errcode)
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Math helper functions ----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// FP value rounding. Called by math.floor/math.ceil fast functions
-+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
-+ |.macro vm_round, name, mode, cond
-+ |->name:
-+ |.if not X64 and cond
-+ | movsd xmm0, qword [esp+4]
-+ | call ->name .. _sse
-+ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
-+ | fld qword [esp+4]
-+ | ret
-+ |.endif
-+ |
-+ |->name .. _sse:
-+ | sseconst_abs xmm2, RDa
-+ | sseconst_2p52 xmm3, RDa
-+ | movaps xmm1, xmm0
-+ | andpd xmm1, xmm2 // |x|
-+ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
-+ | jbe >1
-+ | andnpd xmm2, xmm0 // Isolate sign bit.
-+ |.if mode == 2 // trunc(x)?
-+ | movaps xmm0, xmm1
-+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-+ | subsd xmm1, xmm3
-+ | sseconst_1 xmm3, RDa
-+ | cmpsd xmm0, xmm1, 1 // |x| < result?
-+ | andpd xmm0, xmm3
-+ | subsd xmm1, xmm0 // If yes, subtract -1.
-+ | orpd xmm1, xmm2 // Merge sign bit back in.
-+ |.else
-+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
-+ | subsd xmm1, xmm3
-+ | orpd xmm1, xmm2 // Merge sign bit back in.
-+ | .if mode == 1 // ceil(x)?
-+ | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
-+ | cmpsd xmm0, xmm1, 6 // x > result?
-+ | .else // floor(x)?
-+ | sseconst_1 xmm2, RDa
-+ | cmpsd xmm0, xmm1, 1 // x < result?
-+ | .endif
-+ | andpd xmm0, xmm2
-+ | subsd xmm1, xmm0 // If yes, subtract +-1.
-+ |.endif
-+ | movaps xmm0, xmm1
-+ |1:
-+ | ret
-+ |.endmacro
-+ |
-+ | vm_round vm_floor, 0, 1
-+ | vm_round vm_ceil, 1, JIT
-+ | vm_round vm_trunc, 2, JIT
-+ |
-+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
-+ |->vm_mod:
-+ |// Args in xmm0/xmm1, return value in xmm0.
-+ |// Caveat: xmm0-xmm5 and RC (eax) modified!
-+ | movaps xmm5, xmm0
-+ | divsd xmm0, xmm1
-+ | sseconst_abs xmm2, RDa
-+ | sseconst_2p52 xmm3, RDa
-+ | movaps xmm4, xmm0
-+ | andpd xmm4, xmm2 // |x/y|
-+ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
-+ | jbe >1
-+ | andnpd xmm2, xmm0 // Isolate sign bit.
-+ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
-+ | subsd xmm4, xmm3
-+ | orpd xmm4, xmm2 // Merge sign bit back in.
-+ | sseconst_1 xmm2, RDa
-+ | cmpsd xmm0, xmm4, 1 // x/y < result?
-+ | andpd xmm0, xmm2
-+ | subsd xmm4, xmm0 // If yes, subtract 1.0.
-+ | movaps xmm0, xmm5
-+ | mulsd xmm1, xmm4
-+ | subsd xmm0, xmm1
-+ | ret
-+ |1:
-+ | mulsd xmm1, xmm0
-+ | movaps xmm0, xmm5
-+ | subsd xmm0, xmm1
-+ | ret
-+ |
-+ |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-+ |->vm_powi_sse:
-+ | cmp eax, 1; jle >6 // i<=1?
-+ | // Now 1 < (unsigned)i <= 0x80000000.
-+ |1: // Handle leading zeros.
-+ | test eax, 1; jnz >2
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1
-+ | jmp <1
-+ |2:
-+ | shr eax, 1; jz >5
-+ | movaps xmm1, xmm0
-+ |3: // Handle trailing bits.
-+ | mulsd xmm0, xmm0
-+ | shr eax, 1; jz >4
-+ | jnc <3
-+ | mulsd xmm1, xmm0
-+ | jmp <3
-+ |4:
-+ | mulsd xmm0, xmm1
-+ |5:
-+ | ret
-+ |6:
-+ | je <5 // x^1 ==> x
-+ | jb >7 // x^0 ==> 1
-+ | neg eax
-+ | call <1
-+ | sseconst_1 xmm1, RDa
-+ | divsd xmm1, xmm0
-+ | movaps xmm0, xmm1
-+ | ret
-+ |7:
-+ | sseconst_1 xmm0, RDa
-+ | ret
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Miscellaneous functions --------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
-+ |->vm_cpuid:
-+ |.if X64
-+ | mov eax, CARG1d
-+ | .if X64WIN; push rsi; mov rsi, CARG2; .endif
-+ | push rbx
-+ | xor ecx, ecx
-+ | cpuid
-+ | mov [rsi], eax
-+ | mov [rsi+4], ebx
-+ | mov [rsi+8], ecx
-+ | mov [rsi+12], edx
-+ | pop rbx
-+ | .if X64WIN; pop rsi; .endif
-+ | ret
-+ |.else
-+ | pushfd
-+ | pop edx
-+ | mov ecx, edx
-+ | xor edx, 0x00200000 // Toggle ID bit in flags.
-+ | push edx
-+ | popfd
-+ | pushfd
-+ | pop edx
-+ | xor eax, eax // Zero means no features supported.
-+ | cmp ecx, edx
-+ | jz >1 // No ID toggle means no CPUID support.
-+ | mov eax, [esp+4] // Argument 1 is function number.
-+ | push edi
-+ | push ebx
-+ | xor ecx, ecx
-+ | cpuid
-+ | mov edi, [esp+16] // Argument 2 is result area.
-+ | mov [edi], eax
-+ | mov [edi+4], ebx
-+ | mov [edi+8], ecx
-+ | mov [edi+12], edx
-+ | pop ebx
-+ | pop edi
-+ |1:
-+ | ret
-+ |.endif
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- Assertions ---------------------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |->assert_bad_for_arg_type:
-+#ifdef LUA_USE_ASSERT
-+ | int3
-+#endif
-+ | int3
-+ |
-+ |//-----------------------------------------------------------------------
-+ |//-- FFI helper functions -----------------------------------------------
-+ |//-----------------------------------------------------------------------
-+ |
-+ |// Handler for callback functions. Callback slot number in ah/al.
-+ |->vm_ffi_callback:
-+ |.if FFI
-+ |.type CTSTATE, CTState, PC
-+ |.if not X64
-+ | sub esp, 16 // Leave room for SAVE_ERRF etc.
-+ |.endif
-+ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
-+ | lea DISPATCH, [ebp+GG_G2DISP]
-+ | mov CTSTATE, GL:ebp->ctype_state
-+ | movzx eax, ax
-+ | mov CTSTATE->cb.slot, eax
-+ |.if X64
-+ | mov CTSTATE->cb.gpr[0], CARG1
-+ | mov CTSTATE->cb.gpr[1], CARG2
-+ | mov CTSTATE->cb.gpr[2], CARG3
-+ | mov CTSTATE->cb.gpr[3], CARG4
-+ | movsd qword CTSTATE->cb.fpr[0], xmm0
-+ | movsd qword CTSTATE->cb.fpr[1], xmm1
-+ | movsd qword CTSTATE->cb.fpr[2], xmm2
-+ | movsd qword CTSTATE->cb.fpr[3], xmm3
-+ |.if X64WIN
-+ | lea rax, [rsp+CFRAME_SIZE+4*8]
-+ |.else
-+ | lea rax, [rsp+CFRAME_SIZE]
-+ | mov CTSTATE->cb.gpr[4], CARG5
-+ | mov CTSTATE->cb.gpr[5], CARG6
-+ | movsd qword CTSTATE->cb.fpr[4], xmm4
-+ | movsd qword CTSTATE->cb.fpr[5], xmm5
-+ | movsd qword CTSTATE->cb.fpr[6], xmm6
-+ | movsd qword CTSTATE->cb.fpr[7], xmm7
-+ |.endif
-+ | mov CTSTATE->cb.stack, rax
-+ | mov CARG2, rsp
-+ |.else
-+ | lea eax, [esp+CFRAME_SIZE+16]
-+ | mov CTSTATE->cb.gpr[0], FCARG1
-+ | mov CTSTATE->cb.gpr[1], FCARG2
-+ | mov CTSTATE->cb.stack, eax
-+ | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
-+ | mov FCARG2, [esp+CFRAME_SIZE+8]
-+ | mov SAVE_RET, FCARG1
-+ | mov SAVE_R4, FCARG2
-+ | mov FCARG2, esp
-+ |.endif
-+ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
-+ | mov FCARG1, CTSTATE
-+ | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
-+ | // lua_State * returned in eax (RD).
-+ | set_vmstate INTERP
-+ | mov BASE, L:RD->base
-+ | mov RD, L:RD->top
-+ | sub RD, BASE
-+ | mov LFUNC:RB, [BASE-8]
-+ | shr RD, 3
-+ | add RD, 1
-+ | ins_callt
-+ |.endif
-+ |
-+ |->cont_ffi_callback: // Return from FFI callback.
-+ |.if FFI
-+ | mov L:RA, SAVE_L
-+ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
-+ | mov aword CTSTATE->L, L:RAa
-+ | mov L:RA->base, BASE
-+ | mov L:RA->top, RB
-+ | mov FCARG1, CTSTATE
-+ | mov FCARG2, RC
-+ | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
-+ |.if X64
-+ | mov rax, CTSTATE->cb.gpr[0]
-+ | movsd xmm0, qword CTSTATE->cb.fpr[0]
-+ | jmp ->vm_leave_unw
-+ |.else
-+ | mov L:RB, SAVE_L
-+ | mov eax, CTSTATE->cb.gpr[0]
-+ | mov edx, CTSTATE->cb.gpr[1]
-+ | cmp dword CTSTATE->cb.gpr[2], 1
-+ | jb >7
-+ | je >6
-+ | fld qword CTSTATE->cb.fpr[0].d
-+ | jmp >7
-+ |6:
-+ | fld dword CTSTATE->cb.fpr[0].f
-+ |7:
-+ | mov ecx, L:RB->top
-+ | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
-+ | mov SAVE_L, ecx // Must be one slot above SAVE_RET
-+ | restoreregs
-+ | pop ecx // Move return addr from SAVE_RET.
-+ | add esp, [esp] // Adjust stack.
-+ | add esp, 16
-+ | push ecx
-+ | ret
-+ |.endif
-+ |.endif
-+ |
-+ |->vm_ffi_call@4: // Call C function via FFI.
-+ | // Caveat: needs special frame unwinding, see below.
-+ |.if FFI
-+ |.if X64
-+ | .type CCSTATE, CCallState, rbx
-+ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
-+ |.else
-+ | .type CCSTATE, CCallState, ebx
-+ | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
-+ |.endif
-+ |
-+ | // Readjust stack.
-+ |.if X64
-+ | mov eax, CCSTATE->spadj
-+ | sub rsp, rax
-+ |.else
-+ | sub esp, CCSTATE->spadj
-+ |.if WIN
-+ | mov CCSTATE->spadj, esp
-+ |.endif
-+ |.endif
-+ |
-+ | // Copy stack slots.
-+ | movzx ecx, byte CCSTATE->nsp
-+ | sub ecx, 1
-+ | js >2
-+ |1:
-+ |.if X64
-+ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
-+ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
-+ |.else
-+ | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
-+ | mov [esp+ecx*4], eax
-+ |.endif
-+ | sub ecx, 1
-+ | jns <1
-+ |2:
-+ |
-+ |.if X64
-+ | movzx eax, byte CCSTATE->nfpr
-+ | mov CARG1, CCSTATE->gpr[0]
-+ | mov CARG2, CCSTATE->gpr[1]
-+ | mov CARG3, CCSTATE->gpr[2]
-+ | mov CARG4, CCSTATE->gpr[3]
-+ |.if not X64WIN
-+ | mov CARG5, CCSTATE->gpr[4]
-+ | mov CARG6, CCSTATE->gpr[5]
-+ |.endif
-+ | test eax, eax; jz >5
-+ | movaps xmm0, CCSTATE->fpr[0]
-+ | movaps xmm1, CCSTATE->fpr[1]
-+ | movaps xmm2, CCSTATE->fpr[2]
-+ | movaps xmm3, CCSTATE->fpr[3]
-+ |.if not X64WIN
-+ | cmp eax, 4; jbe >5
-+ | movaps xmm4, CCSTATE->fpr[4]
-+ | movaps xmm5, CCSTATE->fpr[5]
-+ | movaps xmm6, CCSTATE->fpr[6]
-+ | movaps xmm7, CCSTATE->fpr[7]
-+ |.endif
-+ |5:
-+ |.else
-+ | mov FCARG1, CCSTATE->gpr[0]
-+ | mov FCARG2, CCSTATE->gpr[1]
-+ |.endif
-+ |
-+ | call aword CCSTATE->func
-+ |
-+ |.if X64
-+ | mov CCSTATE->gpr[0], rax
-+ | movaps CCSTATE->fpr[0], xmm0
-+ |.if not X64WIN
-+ | mov CCSTATE->gpr[1], rdx
-+ | movaps CCSTATE->fpr[1], xmm1
-+ |.endif
-+ |.else
-+ | mov CCSTATE->gpr[0], eax
-+ | mov CCSTATE->gpr[1], edx
-+ | cmp byte CCSTATE->resx87, 1
-+ | jb >7
-+ | je >6
-+ | fstp qword CCSTATE->fpr[0].d[0]
-+ | jmp >7
-+ |6:
-+ | fstp dword CCSTATE->fpr[0].f[0]
-+ |7:
-+ |.if WIN
-+ | sub CCSTATE->spadj, esp
-+ |.endif
-+ |.endif
-+ |
-+ |.if X64
-+ | mov rbx, [rbp-8]; leave; ret
-+ |.else
-+ | mov ebx, [ebp-4]; leave; ret
-+ |.endif
-+ |.endif
-+ |// Note: vm_ffi_call must be the last function in this object file!
-+ |
-+ |//-----------------------------------------------------------------------
-+}
-+
-+/* Generate the code for a single instruction. */
-+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-+{
-+ int vk = 0;
-+ |// Note: aligning all instructions does not pay off.
-+ |=>defop:
-+
-+ switch (op) {
-+
-+ /* -- Comparison ops ---------------------------------------------------- */
-+
-+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
-+
-+ |.macro jmp_comp, lt, ge, le, gt, target
-+ ||switch (op) {
-+ ||case BC_ISLT:
-+ | lt target
-+ ||break;
-+ ||case BC_ISGE:
-+ | ge target
-+ ||break;
-+ ||case BC_ISLE:
-+ | le target
-+ ||break;
-+ ||case BC_ISGT:
-+ | gt target
-+ ||break;
-+ ||default: break; /* Shut up GCC. */
-+ ||}
-+ |.endmacro
-+
-+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
-+ | // RA = src1, RD = src2, JMP with RD = target
-+ | ins_AD
-+ |.if DUALNUM
-+ | checkint RA, >7
-+ | checkint RD, >8
-+ | mov RB, dword [BASE+RA*8]
-+ | add PC, 4
-+ | cmp RB, dword [BASE+RD*8]
-+ | jmp_comp jge, jl, jg, jle, >9
-+ |6:
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | ja ->vmeta_comp
-+ | // RA is a number.
-+ | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
-+ | // RA is a number, RD is an integer.
-+ | cvtsi2sd xmm0, dword [BASE+RD*8]
-+ | jmp >2
-+ |
-+ |8: // RA is an integer, RD is not an integer.
-+ | ja ->vmeta_comp
-+ | // RA is an integer, RD is a number.
-+ | cvtsi2sd xmm1, dword [BASE+RA*8]
-+ | movsd xmm0, qword [BASE+RD*8]
-+ | add PC, 4
-+ | ucomisd xmm0, xmm1
-+ | jmp_comp jbe, ja, jb, jae, <9
-+ | jmp <6
-+ |.else
-+ | checknum RA, ->vmeta_comp
-+ | checknum RD, ->vmeta_comp
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [BASE+RD*8]
-+ |2:
-+ | add PC, 4
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ |3:
-+ | // Unordered: all of ZF CF PF set, ordered: PF clear.
-+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
-+ |.if DUALNUM
-+ | jmp_comp jbe, ja, jb, jae, <9
-+ | jmp <6
-+ |.else
-+ | jmp_comp jbe, ja, jb, jae, >1
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1:
-+ | ins_next
-+ |.endif
-+ break;
-+
-+ case BC_ISEQV: case BC_ISNEV:
-+ vk = op == BC_ISEQV;
-+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
-+ | mov RB, [BASE+RD*8+4]
-+ | add PC, 4
-+ |.if DUALNUM
-+ | cmp RB, LJ_TISNUM; jne >7
-+ | checkint RA, >8
-+ | mov RB, dword [BASE+RD*8]
-+ | cmp RB, dword [BASE+RA*8]
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RD is not an integer.
-+ | ja >5
-+ | // RD is a number.
-+ | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
-+ | // RD is a number, RA is an integer.
-+ | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ | jmp >2
-+ |
-+ |8: // RD is an integer, RA is not an integer.
-+ | ja >5
-+ | // RD is an integer, RA is a number.
-+ | cvtsi2sd xmm0, dword [BASE+RD*8]
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ | jmp >4
-+ |
-+ |.else
-+ | cmp RB, LJ_TISNUM; jae >5
-+ | checknum RA, >5
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [BASE+RA*8]
-+ |2:
-+ | ucomisd xmm0, qword [BASE+RD*8]
-+ |4:
-+ iseqne_fp:
-+ if (vk) {
-+ | jp >2 // Unordered means not equal.
-+ | jne >2
-+ } else {
-+ | jp >2 // Unordered means not equal.
-+ | je >1
-+ }
-+ iseqne_end:
-+ if (vk) {
-+ |1: // EQ: Branch to the target.
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2: // NE: Fallthrough to next instruction.
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ } else {
-+ |.if not FFI
-+ |3:
-+ |.endif
-+ |2: // NE: Branch to the target.
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1: // EQ: Fallthrough to next instruction.
-+ }
-+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
-+ op == BC_ISEQN || op == BC_ISNEN)) {
-+ | jmp <9
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ if (op == BC_ISEQV || op == BC_ISNEV) {
-+ |5: // Either or both types are not numbers.
-+ |.if FFI
-+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
-+ |.endif
-+ | checktp RA, RB // Compare types.
-+ | jne <2 // Not the same type?
-+ | cmp RB, LJ_TISPRI
-+ | jae <1 // Same type and primitive type?
-+ |
-+ | // Same types and not a primitive type. Compare GCobj or pvalue.
-+ | mov RA, [BASE+RA*8]
-+ | mov RD, [BASE+RD*8]
-+ | cmp RA, RD
-+ | je <1 // Same GCobjs or pvalues?
-+ | cmp RB, LJ_TISTABUD
-+ | ja <2 // Different objects and not table/ud?
-+ |.if X64
-+ | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
-+ | jb <2
-+ |.endif
-+ |
-+ | // Different tables or userdatas. Need to check __eq metamethod.
-+ | // Field metatable must be at same offset for GCtab and GCudata!
-+ | mov TAB:RB, TAB:RA->metatable
-+ | test TAB:RB, TAB:RB
-+ | jz <2 // No metatable?
-+ | test byte TAB:RB->nomm, 1<<MM_eq
-+ | jnz <2 // Or 'no __eq' flag set?
-+ if (vk) {
-+ | xor RB, RB // ne = 0
-+ } else {
-+ | mov RB, 1 // ne = 1
-+ }
-+ | jmp ->vmeta_equal // Handle __eq metamethod.
-+ } else {
-+ |.if FFI
-+ |3:
-+ | cmp RB, LJ_TCDATA
-+ if (LJ_DUALNUM && vk) {
-+ | jne <9
-+ } else {
-+ | jne <2
-+ }
-+ | jmp ->vmeta_equal_cd
-+ |.endif
-+ }
-+ break;
-+ case BC_ISEQS: case BC_ISNES:
-+ vk = op == BC_ISEQS;
-+ | ins_AND // RA = src, RD = str const, JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ | cmp RB, LJ_TSTR; jne >3
-+ | mov RA, [BASE+RA*8]
-+ | cmp RA, [KBASE+RD*4]
-+ iseqne_test:
-+ if (vk) {
-+ | jne >2
-+ } else {
-+ | je >1
-+ }
-+ goto iseqne_end;
-+ case BC_ISEQN: case BC_ISNEN:
-+ vk = op == BC_ISEQN;
-+ | ins_AD // RA = src, RD = num const, JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ |.if DUALNUM
-+ | cmp RB, LJ_TISNUM; jne >7
-+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
-+ | mov RB, dword [KBASE+RD*8]
-+ | cmp RB, dword [BASE+RA*8]
-+ if (vk) {
-+ | jne >9
-+ } else {
-+ | je >9
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |9:
-+ | ins_next
-+ |
-+ |7: // RA is not an integer.
-+ | ja >3
-+ | // RA is a number.
-+ | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
-+ | // RA is a number, RD is an integer.
-+ | cvtsi2sd xmm0, dword [KBASE+RD*8]
-+ | jmp >2
-+ |
-+ |8: // RA is an integer, RD is a number.
-+ | cvtsi2sd xmm0, dword [BASE+RA*8]
-+ | ucomisd xmm0, qword [KBASE+RD*8]
-+ | jmp >4
-+ |.else
-+ | cmp RB, LJ_TISNUM; jae >3
-+ |.endif
-+ |1:
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ |2:
-+ | ucomisd xmm0, qword [BASE+RA*8]
-+ |4:
-+ goto iseqne_fp;
-+ case BC_ISEQP: case BC_ISNEP:
-+ vk = op == BC_ISEQP;
-+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
-+ | mov RB, [BASE+RA*8+4]
-+ | add PC, 4
-+ | cmp RB, RD
-+ if (!LJ_HASFFI) goto iseqne_test;
-+ if (vk) {
-+ | jne >3
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ |3:
-+ | cmp RB, LJ_TCDATA; jne <2
-+ | jmp ->vmeta_equal_cd
-+ } else {
-+ | je >2
-+ | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |2:
-+ | ins_next
-+ }
-+ break;
-+
-+ /* -- Unary test and copy ops ------------------------------------------- */
-+
-+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
-+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
-+ | mov RB, [BASE+RD*8+4]
-+ | add PC, 4
-+ | cmp RB, LJ_TISTRUECOND
-+ if (op == BC_IST || op == BC_ISTC) {
-+ | jae >1
-+ } else {
-+ | jb >1
-+ }
-+ if (op == BC_ISTC || op == BC_ISFC) {
-+ | mov [BASE+RA*8+4], RB
-+ | mov RB, [BASE+RD*8]
-+ | mov [BASE+RA*8], RB
-+ }
-+ | movzx RD, PC_RD
-+ | branchPC RD
-+ |1: // Fallthrough to the next instruction.
-+ | ins_next
-+ break;
-+
-+ case BC_ISTYPE:
-+ | ins_AD // RA = src, RD = -type
-+ | add RD, [BASE+RA*8+4]
-+ | jne ->vmeta_istype
-+ | ins_next
-+ break;
-+ case BC_ISNUM:
-+ | ins_AD // RA = src, RD = -(TISNUM-1)
-+ | checknum RA, ->vmeta_istype
-+ | ins_next
-+ break;
-+
-+ /* -- Unary ops --------------------------------------------------------- */
-+
-+ case BC_MOV:
-+ | ins_AD // RA = dst, RD = src
-+ |.if X64
-+ | mov RBa, [BASE+RD*8]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [BASE+RD*8+4]
-+ | mov RD, [BASE+RD*8]
-+ | mov [BASE+RA*8+4], RB
-+ | mov [BASE+RA*8], RD
-+ |.endif
-+ | ins_next_
-+ break;
-+ case BC_NOT:
-+ | ins_AD // RA = dst, RD = src
-+ | xor RB, RB
-+ | checktp RD, LJ_TISTRUECOND
-+ | adc RB, LJ_TTRUE
-+ | mov [BASE+RA*8+4], RB
-+ | ins_next
-+ break;
-+ case BC_UNM:
-+ | ins_AD // RA = dst, RD = src
-+ |.if DUALNUM
-+ | checkint RD, >5
-+ | mov RB, [BASE+RD*8]
-+ | neg RB
-+ | jo >4
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RB
-+ |9:
-+ | ins_next
-+ |4:
-+ | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
-+ | mov dword [BASE+RA*8], 0
-+ | jmp <9
-+ |5:
-+ | ja ->vmeta_unm
-+ |.else
-+ | checknum RD, ->vmeta_unm
-+ |.endif
-+ | movsd xmm0, qword [BASE+RD*8]
-+ | sseconst_sign xmm1, RDa
-+ | xorps xmm0, xmm1
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.if DUALNUM
-+ | jmp <9
-+ |.else
-+ | ins_next
-+ |.endif
-+ break;
-+ case BC_LEN:
-+ | ins_AD // RA = dst, RD = src
-+ | checkstr RD, >2
-+ | mov STR:RD, [BASE+RD*8]
-+ |.if DUALNUM
-+ | mov RD, dword STR:RD->len
-+ |1:
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RD
-+ |.else
-+ | xorps xmm0, xmm0
-+ | cvtsi2sd xmm0, dword STR:RD->len
-+ |1:
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | ins_next
-+ |2:
-+ | checktab RD, ->vmeta_len
-+ | mov TAB:FCARG1, [BASE+RD*8]
-+#if LJ_52
-+ | mov TAB:RB, TAB:FCARG1->metatable
-+ | cmp TAB:RB, 0
-+ | jnz >9
-+ |3:
-+#endif
-+ |->BC_LEN_Z:
-+ | mov RB, BASE // Save BASE.
-+ | call extern lj_tab_len@4 // (GCtab *t)
-+ | // Length of table returned in eax (RD).
-+ |.if DUALNUM
-+ | // Nothing to do.
-+ |.else
-+ | cvtsi2sd xmm0, RD
-+ |.endif
-+ | mov BASE, RB // Restore BASE.
-+ | movzx RA, PC_RA
-+ | jmp <1
-+#if LJ_52
-+ |9: // Check for __len.
-+ | test byte TAB:RB->nomm, 1<<MM_len
-+ | jnz <3
-+ | jmp ->vmeta_len // 'no __len' flag NOT set: check.
-+#endif
-+ break;
-+
-+ /* -- Binary ops -------------------------------------------------------- */
-+
-+ |.macro ins_arithpre, sseins, ssereg
-+ | ins_ABC
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | checknum RB, ->vmeta_arith_vn
-+ | .if DUALNUM
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
-+ | .endif
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [KBASE+RC*8]
-+ || break;
-+ ||case 1:
-+ | checknum RB, ->vmeta_arith_nv
-+ | .if DUALNUM
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
-+ | .endif
-+ | movsd xmm0, qword [KBASE+RC*8]
-+ | sseins ssereg, qword [BASE+RB*8]
-+ || break;
-+ ||default:
-+ | checknum RB, ->vmeta_arith_vv
-+ | checknum RC, ->vmeta_arith_vv
-+ | movsd xmm0, qword [BASE+RB*8]
-+ | sseins ssereg, qword [BASE+RC*8]
-+ || break;
-+ ||}
-+ |.endmacro
-+ |
-+ |.macro ins_arithdn, intins
-+ | ins_ABC
-+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
-+ ||switch (vk) {
-+ ||case 0:
-+ | checkint RB, ->vmeta_arith_vn
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
-+ | mov RB, [BASE+RB*8]
-+ | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
-+ || break;
-+ ||case 1:
-+ | checkint RB, ->vmeta_arith_nv
-+ | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
-+ | mov RC, [KBASE+RC*8]
-+ | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
-+ || break;
-+ ||default:
-+ | checkint RB, ->vmeta_arith_vv
-+ | checkint RC, ->vmeta_arith_vv
-+ | mov RB, [BASE+RB*8]
-+ | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
-+ || break;
-+ ||}
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ ||if (vk == 1) {
-+ | mov dword [BASE+RA*8], RC
-+ ||} else {
-+ | mov dword [BASE+RA*8], RB
-+ ||}
-+ | ins_next
-+ |.endmacro
-+ |
-+ |.macro ins_arithpost
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endmacro
-+ |
-+ |.macro ins_arith, sseins
-+ | ins_arithpre sseins, xmm0
-+ | ins_arithpost
-+ | ins_next
-+ |.endmacro
-+ |
-+ |.macro ins_arith, intins, sseins
-+ |.if DUALNUM
-+ | ins_arithdn intins
-+ |.else
-+ | ins_arith, sseins
-+ |.endif
-+ |.endmacro
-+
-+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
-+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
-+ | ins_arith add, addsd
-+ break;
-+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
-+ | ins_arith sub, subsd
-+ break;
-+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
-+ | ins_arith imul, mulsd
-+ break;
-+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
-+ | ins_arith divsd
-+ break;
-+ case BC_MODVN:
-+ | ins_arithpre movsd, xmm1
-+ |->BC_MODVN_Z:
-+ | call ->vm_mod
-+ | ins_arithpost
-+ | ins_next
-+ break;
-+ case BC_MODNV: case BC_MODVV:
-+ | ins_arithpre movsd, xmm1
-+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
-+ break;
-+ case BC_POW:
-+ | ins_arithpre movsd, xmm1
-+ | mov RB, BASE
-+ |.if not X64
-+ | movsd FPARG1, xmm0
-+ | movsd FPARG3, xmm1
-+ |.endif
-+ | call extern pow
-+ | movzx RA, PC_RA
-+ | mov BASE, RB
-+ |.if X64
-+ | ins_arithpost
-+ |.else
-+ | fstp qword [BASE+RA*8]
-+ |.endif
-+ | ins_next
-+ break;
-+
-+ case BC_CAT:
-+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | lea CARG2d, [BASE+RC*8]
-+ | mov CARG3d, RC
-+ | sub CARG3d, RB
-+ |->BC_CAT_Z:
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | lea RA, [BASE+RC*8]
-+ | sub RC, RB
-+ | mov ARG2, RA
-+ | mov ARG3, RC
-+ |->BC_CAT_Z:
-+ | mov L:RB, SAVE_L
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
-+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | test RC, RC
-+ | jnz ->vmeta_binop
-+ | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
-+ | movzx RA, PC_RA
-+ |.if X64
-+ | mov RCa, [BASE+RB*8]
-+ | mov [BASE+RA*8], RCa
-+ |.else
-+ | mov RC, [BASE+RB*8+4]
-+ | mov RB, [BASE+RB*8]
-+ | mov [BASE+RA*8+4], RC
-+ | mov [BASE+RA*8], RB
-+ |.endif
-+ | ins_next
-+ break;
-+
-+ /* -- Constant ops ------------------------------------------------------ */
-+
-+ case BC_KSTR:
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | mov RD, [KBASE+RD*4]
-+ | mov dword [BASE+RA*8+4], LJ_TSTR
-+ | mov [BASE+RA*8], RD
-+ | ins_next
-+ break;
-+ case BC_KCDATA:
-+ |.if FFI
-+ | ins_AND // RA = dst, RD = cdata const (~)
-+ | mov RD, [KBASE+RD*4]
-+ | mov dword [BASE+RA*8+4], LJ_TCDATA
-+ | mov [BASE+RA*8], RD
-+ | ins_next
-+ |.endif
-+ break;
-+ case BC_KSHORT:
-+ | ins_AD // RA = dst, RD = signed int16 literal
-+ |.if DUALNUM
-+ | movsx RD, RDW
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RD
-+ |.else
-+ | movsx RD, RDW // Sign-extend literal.
-+ | cvtsi2sd xmm0, RD
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | ins_next
-+ break;
-+ case BC_KNUM:
-+ | ins_AD // RA = dst, RD = num const
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ | movsd qword [BASE+RA*8], xmm0
-+ | ins_next
-+ break;
-+ case BC_KPRI:
-+ | ins_AND // RA = dst, RD = primitive type (~)
-+ | mov [BASE+RA*8+4], RD
-+ | ins_next
-+ break;
-+ case BC_KNIL:
-+ | ins_AD // RA = dst_start, RD = dst_end
-+ | lea RA, [BASE+RA*8+12]
-+ | lea RD, [BASE+RD*8+4]
-+ | mov RB, LJ_TNIL
-+ | mov [RA-8], RB // Sets minimum 2 slots.
-+ |1:
-+ | mov [RA], RB
-+ | add RA, 8
-+ | cmp RA, RD
-+ | jbe <1
-+ | ins_next
-+ break;
-+
-+ /* -- Upvalue and function ops ------------------------------------------ */
-+
-+ case BC_UGET:
-+ | ins_AD // RA = dst, RD = upvalue #
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
-+ | mov RB, UPVAL:RB->v
-+ |.if X64
-+ | mov RDa, [RB]
-+ | mov [BASE+RA*8], RDa
-+ |.else
-+ | mov RD, [RB+4]
-+ | mov RB, [RB]
-+ | mov [BASE+RA*8+4], RD
-+ | mov [BASE+RA*8], RB
-+ |.endif
-+ | ins_next
-+ break;
-+ case BC_USETV:
-+#define TV2MARKOFS \
-+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
-+ | ins_AD // RA = upvalue #, RD = src
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | cmp byte UPVAL:RB->closed, 0
-+ | mov RB, UPVAL:RB->v
-+ | mov RA, [BASE+RD*8]
-+ | mov RD, [BASE+RD*8+4]
-+ | mov [RB], RA
-+ | mov [RB+4], RD
-+ | jz >1
-+ | // Check barrier for closed upvalue.
-+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
-+ | jnz >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Upvalue is black. Check if new value is collectable and white.
-+ | sub RD, LJ_TISGCV
-+ | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
-+ | jbe <1
-+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
-+ | jz <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ |.if X64 and not X64WIN
-+ | mov FCARG2, RB
-+ | mov RB, BASE // Save BASE.
-+ |.else
-+ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
-+ |.endif
-+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-+ | mov BASE, RB // Restore BASE.
-+ | jmp <1
-+ break;
-+#undef TV2MARKOFS
-+ case BC_USETS:
-+ | ins_AND // RA = upvalue #, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov GCOBJ:RA, [KBASE+RD*4]
-+ | mov RD, UPVAL:RB->v
-+ | mov [RD], GCOBJ:RA
-+ | mov dword [RD+4], LJ_TSTR
-+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
-+ | jnz >2
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check if string is white and ensure upvalue is closed.
-+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
-+ | jz <1
-+ | cmp byte UPVAL:RB->closed, 0
-+ | jz <1
-+ | // Crossed a write barrier. Move the barrier forward.
-+ | mov RB, BASE // Save BASE (FCARG2 == BASE).
-+ | mov FCARG2, RD
-+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
-+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
-+ | mov BASE, RB // Restore BASE.
-+ | jmp <1
-+ break;
-+ case BC_USETN:
-+ | ins_AD // RA = upvalue #, RD = num const
-+ | mov LFUNC:RB, [BASE-8]
-+ | movsd xmm0, qword [KBASE+RD*8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov RA, UPVAL:RB->v
-+ | movsd qword [RA], xmm0
-+ | ins_next
-+ break;
-+ case BC_USETP:
-+ | ins_AND // RA = upvalue #, RD = primitive type (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
-+ | mov RA, UPVAL:RB->v
-+ | mov [RA+4], RD
-+ | ins_next
-+ break;
-+ case BC_UCLO:
-+ | ins_AD // RA = level, RD = target
-+ | branchPC RD // Do this first to free RD.
-+ | mov L:RB, SAVE_L
-+ | cmp dword L:RB->openupval, 0
-+ | je >1
-+ | mov L:RB->base, BASE
-+ | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
-+ | mov BASE, L:RB->base
-+ |1:
-+ | ins_next
-+ break;
-+
-+ case BC_FNEW:
-+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
-+ |.if X64
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG3d, [BASE-8]
-+ | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
-+ | mov CARG1d, L:RB
-+ |.else
-+ | mov LFUNC:RA, [BASE-8]
-+ | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, LFUNC:RA
-+ | mov ARG2, PROTO:RD
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
-+ | call extern lj_func_newL_gc
-+ | // GCfuncL * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], LFUNC:RC
-+ | mov dword [BASE+RA*8+4], LJ_TFUNC
-+ | ins_next
-+ break;
-+
-+ /* -- Table ops --------------------------------------------------------- */
-+
-+ case BC_TNEW:
-+ | ins_AD // RA = dst, RD = hbits|asize
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | mov SAVE_PC, PC
-+ | jae >5
-+ |1:
-+ |.if X64
-+ | mov CARG3d, RD
-+ | and RD, 0x7ff
-+ | shr CARG3d, 11
-+ |.else
-+ | mov RA, RD
-+ | and RD, 0x7ff
-+ | shr RA, 11
-+ | mov ARG3, RA
-+ |.endif
-+ | cmp RD, 0x7ff
-+ | je >3
-+ |2:
-+ |.if X64
-+ | mov L:CARG1d, L:RB
-+ | mov CARG2d, RD
-+ |.else
-+ | mov ARG1, L:RB
-+ | mov ARG2, RD
-+ |.endif
-+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
-+ | // Table * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], TAB:RC
-+ | mov dword [BASE+RA*8+4], LJ_TTAB
-+ | ins_next
-+ |3: // Turn 0x7ff into 0x801.
-+ | mov RD, 0x801
-+ | jmp <2
-+ |5:
-+ | mov L:FCARG1, L:RB
-+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-+ | movzx RD, PC_RD
-+ | jmp <1
-+ break;
-+ case BC_TDUP:
-+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
-+ | mov L:RB, SAVE_L
-+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
-+ | mov SAVE_PC, PC
-+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
-+ | mov L:RB->base, BASE
-+ | jae >3
-+ |2:
-+ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
-+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
-+ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
-+ | // Table * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA
-+ | mov [BASE+RA*8], TAB:RC
-+ | mov dword [BASE+RA*8+4], LJ_TTAB
-+ | ins_next
-+ |3:
-+ | mov L:FCARG1, L:RB
-+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
-+ | movzx RD, PC_RD // Need to reload RD.
-+ | not RDa
-+ | jmp <2
-+ break;
-+
-+ case BC_GGET:
-+ | ins_AND // RA = dst, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov TAB:RB, LFUNC:RB->env
-+ | mov STR:RC, [KBASE+RD*4]
-+ | jmp ->BC_TGETS_Z
-+ break;
-+ case BC_GSET:
-+ | ins_AND // RA = src, RD = str const (~)
-+ | mov LFUNC:RB, [BASE-8]
-+ | mov TAB:RB, LFUNC:RB->env
-+ | mov STR:RC, [KBASE+RD*4]
-+ | jmp ->BC_TSETS_Z
-+ break;
-+
-+ case BC_TGETV:
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | checktab RB, ->vmeta_tgetv
-+ | mov TAB:RB, [BASE+RB*8]
-+ |
-+ | // Integer key?
-+ |.if DUALNUM
-+ | checkint RC, >5
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | // Convert number to int and back and compare.
-+ | checknum RC, >5
-+ | movsd xmm0, qword [BASE+RC*8]
-+ | cvttsd2si RC, xmm0
-+ | cvtsi2sd xmm1, RC
-+ | ucomisd xmm0, xmm1
-+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
-+ |.endif
-+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jae ->vmeta_tgetv // Not in array part? Use fallback.
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >2
-+ | // Get array slot.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz >3
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ |3:
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp <1
-+ |
-+ |5: // String key?
-+ | checkstr RC, ->vmeta_tgetv
-+ | mov STR:RC, [BASE+RC*8]
-+ | jmp ->BC_TGETS_Z
-+ break;
-+ case BC_TGETS:
-+ | ins_ABC // RA = dst, RB = table, RC = str const (~)
-+ | not RCa
-+ | mov STR:RC, [KBASE+RC*4]
-+ | checktab RB, ->vmeta_tgets
-+ | mov TAB:RB, [BASE+RB*8]
-+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | add NODE:RA, TAB:RB->node
-+ |1:
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >4
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | jne >4
-+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
-+ | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >5 // Key found, but nil value?
-+ | movzx RC, PC_RA
-+ | // Get node value.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | mov [BASE+RC*8], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov RA, [RA+4]
-+ | mov [BASE+RC*8], RB
-+ | mov [BASE+RC*8+4], RA
-+ |.endif
-+ |2:
-+ | ins_next
-+ |
-+ |3:
-+ | movzx RC, PC_RA
-+ | mov dword [BASE+RC*8+4], LJ_TNIL
-+ | jmp <2
-+ |
-+ |4: // Follow hash chain.
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <1
-+ | // End of hash chain: key not found, nil result.
-+ |
-+ |5: // Check for __index if table value is nil.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test TAB:RA, TAB:RA
-+ | jz <3 // No metatable: done.
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jnz <3 // 'no __index' flag set: done.
-+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
-+ break;
-+ case BC_TGETB:
-+ | ins_ABC // RA = dst, RB = table, RC = byte literal
-+ | checktab RB, ->vmeta_tgetb
-+ | mov TAB:RB, [BASE+RB*8]
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tgetb
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
-+ | je >2
-+ | // Get array slot.
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |1:
-+ | ins_next
-+ |
-+ |2: // Check for __index if table value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz >3
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_index
-+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ |3:
-+ | mov dword [BASE+RA*8+4], LJ_TNIL
-+ | jmp <1
-+ break;
-+ case BC_TGETR:
-+ | ins_ABC // RA = dst, RB = table, RC = key
-+ | mov TAB:RB, [BASE+RB*8]
-+ |.if DUALNUM
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | cvttsd2si RC, qword [BASE+RC*8]
-+ |.endif
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tgetr // Not in array part? Use fallback.
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | // Get array slot.
-+ |->BC_TGETR_Z:
-+ |.if X64
-+ | mov RBa, [RC]
-+ | mov [BASE+RA*8], RBa
-+ |.else
-+ | mov RB, [RC]
-+ | mov RC, [RC+4]
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ |.endif
-+ |->BC_TGETR2_Z:
-+ | ins_next
-+ break;
-+
-+ case BC_TSETV:
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | checktab RB, ->vmeta_tsetv
-+ | mov TAB:RB, [BASE+RB*8]
-+ |
-+ | // Integer key?
-+ |.if DUALNUM
-+ | checkint RC, >5
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | // Convert number to int and back and compare.
-+ | checknum RC, >5
-+ | movsd xmm0, qword [BASE+RC*8]
-+ | cvttsd2si RC, xmm0
-+ | cvtsi2sd xmm1, RC
-+ | ucomisd xmm0, xmm1
-+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
-+ |.endif
-+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
-+ | jae ->vmeta_tsetv
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2: // Set array slot.
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <1
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1
-+ |
-+ |5: // String key?
-+ | checkstr RC, ->vmeta_tsetv
-+ | mov STR:RC, [BASE+RC*8]
-+ | jmp ->BC_TSETS_Z
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+ case BC_TSETS:
-+ | ins_ABC // RA = src, RB = table, RC = str const (~)
-+ | not RCa
-+ | mov STR:RC, [KBASE+RC*4]
-+ | checktab RB, ->vmeta_tsets
-+ | mov TAB:RB, [BASE+RB*8]
-+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
-+ | mov RA, TAB:RB->hmask
-+ | and RA, STR:RC->hash
-+ | imul RA, #NODE
-+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
-+ | add NODE:RA, TAB:RB->node
-+ |1:
-+ | cmp dword NODE:RA->key.it, LJ_TSTR
-+ | jne >5
-+ | cmp dword NODE:RA->key.gcr, STR:RC
-+ | jne >5
-+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
-+ | cmp dword [RA+4], LJ_TNIL
-+ | je >4 // Previous value is nil?
-+ |2:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |3: // Set node value.
-+ | movzx RC, PC_RA
-+ |.if X64
-+ | mov RBa, [BASE+RC*8]
-+ | mov [RA], RBa
-+ |.else
-+ | mov RB, [BASE+RC*8+4]
-+ | mov RC, [BASE+RC*8]
-+ | mov [RA+4], RB
-+ | mov [RA], RC
-+ |.endif
-+ | ins_next
-+ |
-+ |4: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <2
-+ | mov TMP1, RA // Save RA.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ | mov RA, TMP1 // Restore RA.
-+ | jmp <2
-+ |
-+ |5: // Follow hash chain.
-+ | mov NODE:RA, NODE:RA->next
-+ | test NODE:RA, NODE:RA
-+ | jnz <1
-+ | // End of hash chain: key not found, add a new one.
-+ |
-+ | // But check for __newindex first.
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test TAB:RA, TAB:RA
-+ | jz >6 // No metatable: continue.
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
-+ |6:
-+ | mov TMP1, STR:RC
-+ | mov TMP2, LJ_TSTR
-+ | mov TMP3, TAB:RB // Save TAB:RB for us.
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE
-+ | lea CARG3, TMP1
-+ | mov CARG2d, TAB:RB
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
-+ | mov ARG2, TAB:RB
-+ | mov L:RB, SAVE_L
-+ | mov ARG3, RC
-+ | mov ARG1, L:RB
-+ | mov L:RB->base, BASE
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
-+ | // Handles write barrier for the new key. TValue * returned in eax (RC).
-+ | mov BASE, L:RB->base
-+ | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
-+ | mov RA, eax
-+ | jmp <2 // Must check write barrier for value.
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RC // Destroys STR:RC.
-+ | jmp <3
-+ break;
-+ case BC_TSETB:
-+ | ins_ABC // RA = src, RB = table, RC = byte literal
-+ | checktab RB, ->vmeta_tsetb
-+ | mov TAB:RB, [BASE+RB*8]
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tsetb
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | cmp dword [RC+4], LJ_TNIL
-+ | je >3 // Previous value is nil?
-+ |1:
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2: // Set array slot.
-+ |.if X64
-+ | mov RAa, [BASE+RA*8]
-+ | mov [RC], RAa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |3: // Check for __newindex if previous value is nil.
-+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
-+ | jz <1
-+ | mov TAB:RA, TAB:RB->metatable
-+ | test byte TAB:RA->nomm, 1<<MM_newindex
-+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+ case BC_TSETR:
-+ | ins_ABC // RA = src, RB = table, RC = key
-+ | mov TAB:RB, [BASE+RB*8]
-+ |.if DUALNUM
-+ | mov RC, dword [BASE+RC*8]
-+ |.else
-+ | cvttsd2si RC, qword [BASE+RC*8]
-+ |.endif
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2:
-+ | cmp RC, TAB:RB->asize
-+ | jae ->vmeta_tsetr
-+ | shl RC, 3
-+ | add RC, TAB:RB->array
-+ | // Set array slot.
-+ |->BC_TSETR_Z:
-+ |.if X64
-+ | mov RBa, [BASE+RA*8]
-+ | mov [RC], RBa
-+ |.else
-+ | mov RB, [BASE+RA*8+4]
-+ | mov RA, [BASE+RA*8]
-+ | mov [RC+4], RB
-+ | mov [RC], RA
-+ |.endif
-+ | ins_next
-+ |
-+ |7: // Possible table write barrier for the value. Skip valiswhite check.
-+ | barrierback TAB:RB, RA
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <2
-+ break;
-+
-+ case BC_TSETM:
-+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
-+ | mov TMP1, KBASE // Need one more free register.
-+ | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
-+ |1:
-+ | lea RA, [BASE+RA*8]
-+ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
-+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
-+ | jnz >7
-+ |2:
-+ | mov RD, MULTRES
-+ | sub RD, 1
-+ | jz >4 // Nothing to copy?
-+ | add RD, KBASE // Compute needed size.
-+ | cmp RD, TAB:RB->asize
-+ | ja >5 // Doesn't fit into array part?
-+ | sub RD, KBASE
-+ | shl KBASE, 3
-+ | add KBASE, TAB:RB->array
-+ |3: // Copy result slots to table.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | add RA, 8
-+ | mov [KBASE], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov [KBASE], RB
-+ | mov RB, [RA+4]
-+ | add RA, 8
-+ | mov [KBASE+4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub RD, 1
-+ | jnz <3
-+ |4:
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |5: // Need to resize array part.
-+ |.if X64
-+ | mov L:CARG1d, SAVE_L
-+ | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
-+ | mov CARG2d, TAB:RB
-+ | mov CARG3d, RD
-+ | mov L:RB, L:CARG1d
-+ |.else
-+ | mov ARG2, TAB:RB
-+ | mov L:RB, SAVE_L
-+ | mov L:RB->base, BASE
-+ | mov ARG3, RD
-+ | mov ARG1, L:RB
-+ |.endif
-+ | mov SAVE_PC, PC
-+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
-+ | mov BASE, L:RB->base
-+ | movzx RA, PC_RA // Restore RA.
-+ | jmp <1 // Retry.
-+ |
-+ |7: // Possible table write barrier for any value. Skip valiswhite check.
-+ | barrierback TAB:RB, RD
-+ | jmp <2
-+ break;
-+
-+ /* -- Calls and vararg handling ----------------------------------------- */
-+
-+ case BC_CALL: case BC_CALLM:
-+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
-+ if (op == BC_CALLM) {
-+ | add NARGS:RD, MULTRES
-+ }
-+ | cmp dword [BASE+RA*8+4], LJ_TFUNC
-+ | mov LFUNC:RB, [BASE+RA*8]
-+ | jne ->vmeta_call_ra
-+ | lea BASE, [BASE+RA*8+8]
-+ | ins_call
-+ break;
-+
-+ case BC_CALLMT:
-+ | ins_AD // RA = base, RD = extra_nargs
-+ | add NARGS:RD, MULTRES
-+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
-+ break;
-+ case BC_CALLT:
-+ | ins_AD // RA = base, RD = nargs+1
-+ | lea RA, [BASE+RA*8+8]
-+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
-+ | mov LFUNC:RB, [RA-8]
-+ | cmp dword [RA-4], LJ_TFUNC
-+ | jne ->vmeta_call
-+ |->BC_CALLT_Z:
-+ | mov PC, [BASE-4]
-+ | test PC, FRAME_TYPE
-+ | jnz >7
-+ |1:
-+ | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
-+ | mov MULTRES, NARGS:RD
-+ | sub NARGS:RD, 1
-+ | jz >3
-+ |2: // Move args down.
-+ |.if X64
-+ | mov RBa, [RA]
-+ | add RA, 8
-+ | mov [KBASE], RBa
-+ |.else
-+ | mov RB, [RA]
-+ | mov [KBASE], RB
-+ | mov RB, [RA+4]
-+ | add RA, 8
-+ | mov [KBASE+4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub NARGS:RD, 1
-+ | jnz <2
-+ |
-+ | mov LFUNC:RB, [BASE-8]
-+ |3:
-+ | mov NARGS:RD, MULTRES
-+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
-+ | ja >5
-+ |4:
-+ | ins_callt
-+ |
-+ |5: // Tailcall to a fast function.
-+ | test PC, FRAME_TYPE // Lua frame below?
-+ | jnz <4
-+ | movzx RA, PC_RA
-+ | not RAa
-+ | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | jmp <4
-+ |
-+ |7: // Tailcall from a vararg function.
-+ | sub PC, FRAME_VARG
-+ | test PC, FRAME_TYPEP
-+ | jnz >8 // Vararg frame below?
-+ | sub BASE, PC // Need to relocate BASE/KBASE down.
-+ | mov KBASE, BASE
-+ | mov PC, [BASE-4]
-+ | jmp <1
-+ |8:
-+ | add PC, FRAME_VARG
-+ | jmp <1
-+ break;
-+
-+ case BC_ITERC:
-+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
-+ | lea RA, [BASE+RA*8+8] // fb = base+1
-+ |.if X64
-+ | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
-+ | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
-+ | mov [RA], RBa
-+ | mov [RA+8], RCa
-+ |.else
-+ | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
-+ | mov RC, [RA-20]
-+ | mov [RA], RB
-+ | mov [RA+4], RC
-+ | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
-+ | mov RC, [RA-12]
-+ | mov [RA+8], RB
-+ | mov [RA+12], RC
-+ |.endif
-+ | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
-+ | mov RC, [RA-28]
-+ | mov [RA-8], LFUNC:RB
-+ | mov [RA-4], RC
-+ | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
-+ | mov NARGS:RD, 2+1
-+ | jne ->vmeta_call
-+ | mov BASE, RA
-+ | ins_call
-+ break;
-+
-+ case BC_ITERN:
-+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
-+ |.if JIT
-+ | // NYI: add hotloop, record BC_ITERN.
-+ |.endif
-+ | mov TMP1, KBASE // Need two more free registers.
-+ | mov TMP2, DISPATCH
-+ | mov TAB:RB, [BASE+RA*8-16]
-+ | mov RC, [BASE+RA*8-8] // Get index from control var.
-+ | mov DISPATCH, TAB:RB->asize
-+ | add PC, 4
-+ | mov KBASE, TAB:RB->array
-+ |1: // Traverse array part.
-+ | cmp RC, DISPATCH; jae >5 // Index points after array part?
-+ | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
-+ |.if DUALNUM
-+ | mov dword [BASE+RA*8+4], LJ_TISNUM
-+ | mov dword [BASE+RA*8], RC
-+ |.else
-+ | cvtsi2sd xmm0, RC
-+ |.endif
-+ | // Copy array slot to returned value.
-+ |.if X64
-+ | mov RBa, [KBASE+RC*8]
-+ | mov [BASE+RA*8+8], RBa
-+ |.else
-+ | mov RB, [KBASE+RC*8+4]
-+ | mov [BASE+RA*8+12], RB
-+ | mov RB, [KBASE+RC*8]
-+ | mov [BASE+RA*8+8], RB
-+ |.endif
-+ | add RC, 1
-+ | // Return array index as a numeric key.
-+ |.if DUALNUM
-+ | // See above.
-+ |.else
-+ | movsd qword [BASE+RA*8], xmm0
-+ |.endif
-+ | mov [BASE+RA*8-8], RC // Update control var.
-+ |2:
-+ | movzx RD, PC_RD // Get target from ITERL.
-+ | branchPC RD
-+ |3:
-+ | mov DISPATCH, TMP2
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |4: // Skip holes in array part.
-+ | add RC, 1
-+ | jmp <1
-+ |
-+ |5: // Traverse hash part.
-+ | sub RC, DISPATCH
-+ |6:
-+ | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
-+ | imul KBASE, RC, #NODE
-+ | add NODE:KBASE, TAB:RB->node
-+ | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
-+ | lea DISPATCH, [RC+DISPATCH+1]
-+ | // Copy key and value from hash slot.
-+ |.if X64
-+ | mov RBa, NODE:KBASE->key
-+ | mov RCa, NODE:KBASE->val
-+ | mov [BASE+RA*8], RBa
-+ | mov [BASE+RA*8+8], RCa
-+ |.else
-+ | mov RB, NODE:KBASE->key.gcr
-+ | mov RC, NODE:KBASE->key.it
-+ | mov [BASE+RA*8], RB
-+ | mov [BASE+RA*8+4], RC
-+ | mov RB, NODE:KBASE->val.gcr
-+ | mov RC, NODE:KBASE->val.it
-+ | mov [BASE+RA*8+8], RB
-+ | mov [BASE+RA*8+12], RC
-+ |.endif
-+ | mov [BASE+RA*8-8], DISPATCH
-+ | jmp <2
-+ |
-+ |7: // Skip holes in hash part.
-+ | add RC, 1
-+ | jmp <6
-+ break;
-+
-+ case BC_ISNEXT:
-+ | ins_AD // RA = base, RD = target (points to ITERN)
-+ | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
-+ | mov CFUNC:RB, [BASE+RA*8-24]
-+ | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
-+ | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
-+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
-+ | branchPC RD
-+ | mov dword [BASE+RA*8-8], 0 // Initialize control var.
-+ | mov dword [BASE+RA*8-4], 0xfffe7fff
-+ |1:
-+ | ins_next
-+ |5: // Despecialize bytecode if any of the checks fail.
-+ | mov PC_OP, BC_JMP
-+ | branchPC RD
-+ | mov byte [PC], BC_ITERC
-+ | jmp <1
-+ break;
-+
-+ case BC_VARG:
-+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
-+ | mov TMP1, KBASE // Need one more free register.
-+ | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
-+ | lea RA, [BASE+RA*8]
-+ | sub KBASE, [BASE-4]
-+ | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
-+ | test RB, RB
-+ | jz >5 // Copy all varargs?
-+ | lea RB, [RA+RB*8-8]
-+ | cmp KBASE, BASE // No vararg slots?
-+ | jnb >2
-+ |1: // Copy vararg slots to destination slots.
-+ |.if X64
-+ | mov RCa, [KBASE-8]
-+ | add KBASE, 8
-+ | mov [RA], RCa
-+ |.else
-+ | mov RC, [KBASE-8]
-+ | mov [RA], RC
-+ | mov RC, [KBASE-4]
-+ | add KBASE, 8
-+ | mov [RA+4], RC
-+ |.endif
-+ | add RA, 8
-+ | cmp RA, RB // All destination slots filled?
-+ | jnb >3
-+ | cmp KBASE, BASE // No more vararg slots?
-+ | jb <1
-+ |2: // Fill up remainder with nil.
-+ | mov dword [RA+4], LJ_TNIL
-+ | add RA, 8
-+ | cmp RA, RB
-+ | jb <2
-+ |3:
-+ | mov KBASE, TMP1
-+ | ins_next
-+ |
-+ |5: // Copy all varargs.
-+ | mov MULTRES, 1 // MULTRES = 0+1
-+ | mov RC, BASE
-+ | sub RC, KBASE
-+ | jbe <3 // No vararg slots?
-+ | mov RB, RC
-+ | shr RB, 3
-+ | add RB, 1
-+ | mov MULTRES, RB // MULTRES = #varargs+1
-+ | mov L:RB, SAVE_L
-+ | add RC, RA
-+ | cmp RC, L:RB->maxstack
-+ | ja >7 // Need to grow stack?
-+ |6: // Copy all vararg slots.
-+ |.if X64
-+ | mov RCa, [KBASE-8]
-+ | add KBASE, 8
-+ | mov [RA], RCa
-+ |.else
-+ | mov RC, [KBASE-8]
-+ | mov [RA], RC
-+ | mov RC, [KBASE-4]
-+ | add KBASE, 8
-+ | mov [RA+4], RC
-+ |.endif
-+ | add RA, 8
-+ | cmp KBASE, BASE // No more vararg slots?
-+ | jb <6
-+ | jmp <3
-+ |
-+ |7: // Grow stack for varargs.
-+ | mov L:RB->base, BASE
-+ | mov L:RB->top, RA
-+ | mov SAVE_PC, PC
-+ | sub KBASE, BASE // Need delta, because BASE may change.
-+ | mov FCARG2, MULTRES
-+ | sub FCARG2, 1
-+ | mov FCARG1, L:RB
-+ | call extern lj_state_growstack@8 // (lua_State *L, int n)
-+ | mov BASE, L:RB->base
-+ | mov RA, L:RB->top
-+ | add KBASE, BASE
-+ | jmp <6
-+ break;
-+
-+ /* -- Returns ----------------------------------------------------------- */
-+
-+ case BC_RETM:
-+ | ins_AD // RA = results, RD = extra_nresults
-+ | add RD, MULTRES // MULTRES >=1, so RD >=1.
-+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
-+ break;
-+
-+ case BC_RET: case BC_RET0: case BC_RET1:
-+ | ins_AD // RA = results, RD = nresults+1
-+ if (op != BC_RET0) {
-+ | shl RA, 3
-+ }
-+ |1:
-+ | mov PC, [BASE-4]
-+ | mov MULTRES, RD // Save nresults+1.
-+ | test PC, FRAME_TYPE // Check frame type marker.
-+ | jnz >7 // Not returning to a fixarg Lua func?
-+ switch (op) {
-+ case BC_RET:
-+ |->BC_RET_Z:
-+ | mov KBASE, BASE // Use KBASE for result move.
-+ | sub RD, 1
-+ | jz >3
-+ |2: // Move results down.
-+ |.if X64
-+ | mov RBa, [KBASE+RA]
-+ | mov [KBASE-8], RBa
-+ |.else
-+ | mov RB, [KBASE+RA]
-+ | mov [KBASE-8], RB
-+ | mov RB, [KBASE+RA+4]
-+ | mov [KBASE-4], RB
-+ |.endif
-+ | add KBASE, 8
-+ | sub RD, 1
-+ | jnz <2
-+ |3:
-+ | mov RD, MULTRES // Note: MULTRES may be >255.
-+ | movzx RB, PC_RB // So cannot compare with RDL!
-+ |5:
-+ | cmp RB, RD // More results expected?
-+ | ja >6
-+ break;
-+ case BC_RET1:
-+ |.if X64
-+ | mov RBa, [BASE+RA]
-+ | mov [BASE-8], RBa
-+ |.else
-+ | mov RB, [BASE+RA+4]
-+ | mov [BASE-4], RB
-+ | mov RB, [BASE+RA]
-+ | mov [BASE-8], RB
-+ |.endif
-+ /* fallthrough */
-+ case BC_RET0:
-+ |5:
-+ | cmp PC_RB, RDL // More results expected?
-+ | ja >6
-+ default:
-+ break;
-+ }
-+ | movzx RA, PC_RA
-+ | not RAa // Note: ~RA = -(RA+1)
-+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov KBASE, LFUNC:KBASE->pc
-+ | mov KBASE, [KBASE+PC2PROTO(k)]
-+ | ins_next
-+ |
-+ |6: // Fill up results with nil.
-+ if (op == BC_RET) {
-+ | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
-+ | add KBASE, 8
-+ } else {
-+ | mov dword [BASE+RD*8-12], LJ_TNIL
-+ }
-+ | add RD, 1
-+ | jmp <5
-+ |
-+ |7: // Non-standard return case.
-+ | lea RB, [PC-FRAME_VARG]
-+ | test RB, FRAME_TYPEP
-+ | jnz ->vm_return
-+ | // Return from vararg function: relocate BASE down and RA up.
-+ | sub BASE, RB
-+ if (op != BC_RET0) {
-+ | add RA, RB
-+ }
-+ | jmp <1
-+ break;
-+
-+ /* -- Loops and branches ------------------------------------------------ */
-+
-+ |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
-+ |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
-+ |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
-+ |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
-+
-+ case BC_FORL:
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
-+ break;
-+
-+ case BC_JFORI:
-+ case BC_JFORL:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_FORI:
-+ case BC_IFORL:
-+ vk = (op == BC_IFORL || op == BC_JFORL);
-+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
-+ | lea RA, [BASE+RA*8]
-+ if (LJ_DUALNUM) {
-+ | cmp FOR_TIDX, LJ_TISNUM; jne >9
-+ if (!vk) {
-+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
-+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
-+ | mov RB, dword FOR_IDX
-+ | cmp dword FOR_STEP, 0; jl >5
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
-+ | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
-+#endif
-+ | mov RB, dword FOR_STEP
-+ | test RB, RB; js >5
-+ | add RB, dword FOR_IDX; jo >1
-+ | mov dword FOR_IDX, RB
-+ }
-+ | cmp RB, dword FOR_STOP
-+ | mov FOR_TEXT, LJ_TISNUM
-+ | mov dword FOR_EXT, RB
-+ if (op == BC_FORI) {
-+ | jle >7
-+ |1:
-+ |6:
-+ | branchPC RD
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ } else if (op == BC_IFORL) {
-+ | jg >7
-+ |6:
-+ | branchPC RD
-+ |1:
-+ } else {
-+ | jle =>BC_JLOOP
-+ |1:
-+ |6:
-+ }
-+ |7:
-+ | ins_next
-+ |
-+ |5: // Invert check for negative step.
-+ if (vk) {
-+ | add RB, dword FOR_IDX; jo <1
-+ | mov dword FOR_IDX, RB
-+ }
-+ | cmp RB, dword FOR_STOP
-+ | mov FOR_TEXT, LJ_TISNUM
-+ | mov dword FOR_EXT, RB
-+ if (op == BC_FORI) {
-+ | jge <7
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jge =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ | jl <7
-+ } else {
-+ | jge =>BC_JLOOP
-+ }
-+ | jmp <6
-+ |9: // Fallback to FP variant.
-+ } else if (!vk) {
-+ | cmp FOR_TIDX, LJ_TISNUM
-+ }
-+ if (!vk) {
-+ | jae ->vmeta_for
-+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
-+ } else {
-+#ifdef LUA_USE_ASSERT
-+ | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
-+ | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
-+#endif
-+ }
-+ | mov RB, FOR_TSTEP // Load type/hiword of for step.
-+ if (!vk) {
-+ | cmp RB, LJ_TISNUM; jae ->vmeta_for
-+ }
-+ | movsd xmm0, qword FOR_IDX
-+ | movsd xmm1, qword FOR_STOP
-+ if (vk) {
-+ | addsd xmm0, qword FOR_STEP
-+ | movsd qword FOR_IDX, xmm0
-+ | test RB, RB; js >3
-+ } else {
-+ | jl >3
-+ }
-+ | ucomisd xmm1, xmm0
-+ |1:
-+ | movsd qword FOR_EXT, xmm0
-+ if (op == BC_FORI) {
-+ |.if DUALNUM
-+ | jnb <7
-+ |.else
-+ | jnb >2
-+ | branchPC RD
-+ |.endif
-+ } else if (op == BC_JFORI) {
-+ | branchPC RD
-+ | movzx RD, PC_RD
-+ | jnb =>BC_JLOOP
-+ } else if (op == BC_IFORL) {
-+ |.if DUALNUM
-+ | jb <7
-+ |.else
-+ | jb >2
-+ | branchPC RD
-+ |.endif
-+ } else {
-+ | jnb =>BC_JLOOP
-+ }
-+ |.if DUALNUM
-+ | jmp <6
-+ |.else
-+ |2:
-+ | ins_next
-+ |.endif
-+ |
-+ |3: // Invert comparison if step is negative.
-+ | ucomisd xmm0, xmm1
-+ | jmp <1
-+ break;
-+
-+ case BC_ITERL:
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
-+ break;
-+
-+ case BC_JITERL:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_IITERL:
-+ | ins_AJ // RA = base, RD = target
-+ | lea RA, [BASE+RA*8]
-+ | mov RB, [RA+4]
-+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
-+ if (op == BC_JITERL) {
-+ | mov [RA-4], RB
-+ | mov RB, [RA]
-+ | mov [RA-8], RB
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | branchPC RD // Otherwise save control var + branch.
-+ | mov RD, [RA]
-+ | mov [RA-4], RB
-+ | mov [RA-8], RD
-+ }
-+ |1:
-+ | ins_next
-+ break;
-+
-+ case BC_LOOP:
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
-+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
-+ |.if JIT
-+ | hotloop RB
-+ |.endif
-+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
-+ break;
-+
-+ case BC_ILOOP:
-+ | ins_A // RA = base, RD = target (loop extent)
-+ | ins_next
-+ break;
-+
-+ case BC_JLOOP:
-+ |.if JIT
-+ | ins_AD // RA = base (ignored), RD = traceno
-+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
-+ | mov TRACE:RD, [RA+RD*4]
-+ | mov RDa, TRACE:RD->mcode
-+ | mov L:RB, SAVE_L
-+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
-+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
-+ | // Save additional callee-save registers only used in compiled code.
-+ |.if X64WIN
-+ | mov TMPQ, r12
-+ | mov TMPa, r13
-+ | mov CSAVE_4, r14
-+ | mov CSAVE_3, r15
-+ | mov RAa, rsp
-+ | sub rsp, 9*16+4*8
-+ | movdqa [RAa], xmm6
-+ | movdqa [RAa-1*16], xmm7
-+ | movdqa [RAa-2*16], xmm8
-+ | movdqa [RAa-3*16], xmm9
-+ | movdqa [RAa-4*16], xmm10
-+ | movdqa [RAa-5*16], xmm11
-+ | movdqa [RAa-6*16], xmm12
-+ | movdqa [RAa-7*16], xmm13
-+ | movdqa [RAa-8*16], xmm14
-+ | movdqa [RAa-9*16], xmm15
-+ |.elif X64
-+ | mov TMPQ, r12
-+ | mov TMPa, r13
-+ | sub rsp, 16
-+ |.endif
-+ | jmp RDa
-+ |.endif
-+ break;
-+
-+ case BC_JMP:
-+ | ins_AJ // RA = unused, RD = target
-+ | branchPC RD
-+ | ins_next
-+ break;
-+
-+ /* -- Function headers -------------------------------------------------- */
-+
-+ /*
-+ ** Reminder: A function may be called with func/args above L->maxstack,
-+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
-+ ** too. This means all FUNC* ops (including fast functions) must check
-+ ** for stack overflow _before_ adding more slots!
-+ */
-+
-+ case BC_FUNCF:
-+ |.if JIT
-+ | hotcall RB
-+ |.endif
-+ case BC_FUNCV: /* NYI: compiled vararg functions. */
-+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
-+ break;
-+
-+ case BC_JFUNCF:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ case BC_IFUNCF:
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | mov KBASE, [PC-4+PC2PROTO(k)]
-+ | mov L:RB, SAVE_L
-+ | lea RA, [BASE+RA*8] // Top of frame.
-+ | cmp RA, L:RB->maxstack
-+ | ja ->vm_growstack_f
-+ | movzx RA, byte [PC-4+PC2PROTO(numparams)]
-+ | cmp NARGS:RD, RA // Check for missing parameters.
-+ | jbe >3
-+ |2:
-+ if (op == BC_JFUNCF) {
-+ | movzx RD, PC_RD
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
-+ | add NARGS:RD, 1
-+ | cmp NARGS:RD, RA
-+ | jbe <3
-+ | jmp <2
-+ break;
-+
-+ case BC_JFUNCV:
-+#if !LJ_HASJIT
-+ break;
-+#endif
-+ | int3 // NYI: compiled vararg functions
-+ break; /* NYI: compiled vararg functions. */
-+
-+ case BC_IFUNCV:
-+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
-+ | lea RB, [NARGS:RD*8+FRAME_VARG]
-+ | lea RD, [BASE+NARGS:RD*8]
-+ | mov LFUNC:KBASE, [BASE-8]
-+ | mov [RD-4], RB // Store delta + FRAME_VARG.
-+ | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
-+ | mov L:RB, SAVE_L
-+ | lea RA, [RD+RA*8]
-+ | cmp RA, L:RB->maxstack
-+ | ja ->vm_growstack_v // Need to grow stack.
-+ | mov RA, BASE
-+ | mov BASE, RD
-+ | movzx RB, byte [PC-4+PC2PROTO(numparams)]
-+ | test RB, RB
-+ | jz >2
-+ |1: // Copy fixarg slots up to new frame.
-+ | add RA, 8
-+ | cmp RA, BASE
-+ | jnb >3 // Less args than parameters?
-+ | mov KBASE, [RA-8]
-+ | mov [RD], KBASE
-+ | mov KBASE, [RA-4]
-+ | mov [RD+4], KBASE
-+ | add RD, 8
-+ | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
-+ | sub RB, 1
-+ | jnz <1
-+ |2:
-+ if (op == BC_JFUNCV) {
-+ | movzx RD, PC_RD
-+ | jmp =>BC_JLOOP
-+ } else {
-+ | mov KBASE, [PC-4+PC2PROTO(k)]
-+ | ins_next
-+ }
-+ |
-+ |3: // Clear missing parameters.
-+ | mov dword [RD+4], LJ_TNIL
-+ | add RD, 8
-+ | sub RB, 1
-+ | jnz <3
-+ | jmp <2
-+ break;
-+
-+ case BC_FUNCC:
-+ case BC_FUNCCW:
-+ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
-+ | mov CFUNC:RB, [BASE-8]
-+ | mov KBASEa, CFUNC:RB->f
-+ | mov L:RB, SAVE_L
-+ | lea RD, [BASE+NARGS:RD*8-8]
-+ | mov L:RB->base, BASE
-+ | lea RA, [RD+8*LUA_MINSTACK]
-+ | cmp RA, L:RB->maxstack
-+ | mov L:RB->top, RD
-+ if (op == BC_FUNCC) {
-+ |.if X64
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | mov ARG1, L:RB
-+ |.endif
-+ } else {
-+ |.if X64
-+ | mov CARG2, KBASEa
-+ | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
-+ |.else
-+ | mov ARG2, KBASEa
-+ | mov ARG1, L:RB
-+ |.endif
-+ }
-+ | ja ->vm_growstack_c // Need to grow stack.
-+ | set_vmstate C
-+ if (op == BC_FUNCC) {
-+ | call KBASEa // (lua_State *L)
-+ } else {
-+ | // (lua_State *L, lua_CFunction f)
-+ | call aword [DISPATCH+DISPATCH_GL(wrapf)]
-+ }
-+ | // nresults returned in eax (RD).
-+ | mov BASE, L:RB->base
-+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
-+ | set_vmstate INTERP
-+ | lea RA, [BASE+RD*8]
-+ | neg RA
-+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
-+ | mov PC, [BASE-4] // Fetch PC of caller.
-+ | jmp ->vm_returnc
-+ break;
-+
-+ /* ---------------------------------------------------------------------- */
-+
-+ default:
-+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
-+ exit(2);
-+ break;
-+ }
-+}
-+
-+static int build_backend(BuildCtx *ctx)
-+{
-+ int op;
-+ dasm_growpc(Dst, BC__MAX);
-+ build_subroutines(ctx);
-+ |.code_op
-+ for (op = 0; op < BC__MAX; op++)
-+ build_ins(ctx, (BCOp)op, op);
-+ return BC__MAX;
-+}
-+
-+/* Emit pseudo frame-info for all assembler functions. */
-+static void emit_asm_debug(BuildCtx *ctx)
-+{
-+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-+#if LJ_64
-+#define SZPTR "8"
-+#define BSZPTR "3"
-+#define REG_SP "0x7"
-+#define REG_RA "0x10"
-+#else
-+#define SZPTR "4"
-+#define BSZPTR "2"
-+#define REG_SP "0x4"
-+#define REG_RA "0x8"
-+#endif
-+ switch (ctx->mode) {
-+ case BUILD_elfasm:
-+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
-+ fprintf(ctx->fp,
-+ ".Lframe0:\n"
-+ "\t.long .LECIE0-.LSCIE0\n"
-+ ".LSCIE0:\n"
-+ "\t.long 0xffffffff\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE0:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE0:\n"
-+ "\t.long .LEFDE0-.LASFDE0\n"
-+ ".LASFDE0:\n"
-+ "\t.long .Lframe0\n"
-+#if LJ_64
-+ "\t.quad .Lbegin\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
-+#if LJ_NO_UNWIND
-+ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
-+ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
-+#endif
-+#else
-+ "\t.long .Lbegin\n"
-+ "\t.long %d\n"
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".LSFDE1:\n"
-+ "\t.long .LEFDE1-.LASFDE1\n"
-+ ".LASFDE1:\n"
-+ "\t.long .Lframe0\n"
-+#if LJ_64
-+ "\t.quad lj_vm_ffi_call\n"
-+ "\t.quad %d\n"
-+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+#else
-+ "\t.long lj_vm_ffi_call\n"
-+ "\t.long %d\n"
-+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
-+#endif
-+#if !LJ_NO_UNWIND
-+#if (defined(__sun__) && defined(__svr4__))
-+#if LJ_64
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
-+#else
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
-+#endif
-+#else
-+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
-+#endif
-+ fprintf(ctx->fp,
-+ ".Lframe1:\n"
-+ "\t.long .LECIE1-.LSCIE1\n"
-+ ".LSCIE1:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zPR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.uleb128 6\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.long lj_err_unwind_dwarf-.\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE1:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE2:\n"
-+ "\t.long .LEFDE2-.LASFDE2\n"
-+ ".LASFDE2:\n"
-+ "\t.long .LASFDE2-.Lframe1\n"
-+ "\t.long .Lbegin-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
-+#if LJ_64
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
-+#else
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
-+#if LJ_HASFFI
-+ fprintf(ctx->fp,
-+ ".Lframe2:\n"
-+ "\t.long .LECIE2-.LSCIE2\n"
-+ ".LSCIE2:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.string \"zR\"\n"
-+ "\t.uleb128 0x1\n"
-+ "\t.sleb128 -" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.uleb128 1\n" /* augmentation length */
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
-+ "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
-+ "\t.align " SZPTR "\n"
-+ ".LECIE2:\n\n");
-+ fprintf(ctx->fp,
-+ ".LSFDE3:\n"
-+ "\t.long .LEFDE3-.LASFDE3\n"
-+ ".LASFDE3:\n"
-+ "\t.long .LASFDE3-.Lframe2\n"
-+ "\t.long lj_vm_ffi_call-.\n"
-+ "\t.long %d\n"
-+ "\t.uleb128 0\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
-+#else
-+ "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
-+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
-+ "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " SZPTR "\n"
-+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
-+#endif
-+#endif
-+ break;
-+#if !LJ_NO_UNWIND
-+ /* Mental note: never let Apple design an assembler.
-+ ** Or a linker. Or a plastic case. But I digress.
-+ */
-+ case BUILD_machasm: {
-+#if LJ_HASFFI
-+ int fcsize = 0;
-+#endif
-+ int i;
-+ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
-+ fprintf(ctx->fp,
-+ "EH_frame1:\n"
-+ "\t.set L$set$x,LECIEX-LSCIEX\n"
-+ "\t.long L$set$x\n"
-+ "LSCIEX:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.ascii \"zPR\\0\"\n"
-+ "\t.byte 0x1\n"
-+ "\t.byte 128-" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 6\n" /* augmentation length */
-+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
-+#if LJ_64
-+ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
-+#else
-+ "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
-+#endif
-+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-+ "\t.align " BSZPTR "\n"
-+ "LECIEX:\n\n");
-+ for (i = 0; i < ctx->nsym; i++) {
-+ const char *name = ctx->sym[i].name;
-+ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
-+ if (size == 0) continue;
-+#if LJ_HASFFI
-+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
-+#endif
-+ fprintf(ctx->fp,
-+ "%s.eh:\n"
-+ "LSFDE%d:\n"
-+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
-+ "\t.long L$set$%d\n"
-+ "LASFDE%d:\n"
-+ "\t.long LASFDE%d-EH_frame1\n"
-+ "\t.long %s-.\n"
-+ "\t.long %d\n"
-+ "\t.byte 0\n" /* augmentation length */
-+ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
-+#if LJ_64
-+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
-+ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
-+ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
-+#else
-+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-+ "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
-+ "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
-+ "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
-+#endif
-+ "\t.align " BSZPTR "\n"
-+ "LEFDE%d:\n\n",
-+ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
-+ }
-+#if LJ_HASFFI
-+ if (fcsize) {
-+ fprintf(ctx->fp,
-+ "EH_frame2:\n"
-+ "\t.set L$set$y,LECIEY-LSCIEY\n"
-+ "\t.long L$set$y\n"
-+ "LSCIEY:\n"
-+ "\t.long 0\n"
-+ "\t.byte 0x1\n"
-+ "\t.ascii \"zR\\0\"\n"
-+ "\t.byte 0x1\n"
-+ "\t.byte 128-" SZPTR "\n"
-+ "\t.byte " REG_RA "\n"
-+ "\t.byte 1\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
-+#else
-+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
-+ "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
-+#endif
-+ "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
-+ "\t.align " BSZPTR "\n"
-+ "LECIEY:\n\n");
-+ fprintf(ctx->fp,
-+ "_lj_vm_ffi_call.eh:\n"
-+ "LSFDEY:\n"
-+ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
-+ "\t.long L$set$yy\n"
-+ "LASFDEY:\n"
-+ "\t.long LASFDEY-EH_frame2\n"
-+ "\t.long _lj_vm_ffi_call-.\n"
-+ "\t.long %d\n"
-+ "\t.byte 0\n" /* augmentation length */
-+#if LJ_64
-+ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
-+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
-+ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
-+#else
-+ "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
-+ "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
-+ "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
-+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
-+#endif
-+ "\t.align " BSZPTR "\n"
-+ "LEFDEY:\n\n", fcsize);
-+ }
-+#endif
-+#if !LJ_64
-+ fprintf(ctx->fp,
-+ "\t.non_lazy_symbol_pointer\n"
-+ "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
-+ ".indirect_symbol _lj_err_unwind_dwarf\n"
-+ ".long 0\n\n");
-+ fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
-+ {
-+ const char *const *xn;
-+ for (xn = ctx->extnames; *xn; xn++)
-+ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
-+ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
-+ }
-+#endif
-+ fprintf(ctx->fp, ".subsections_via_symbols\n");
-+ }
-+ break;
-+#endif
-+ default: /* Difficult for other modes. */
-+ break;
-+ }
-+}
-
-From 2315613b3835fc0b89601a1fc2e72fffa1857b35 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 15 Nov 2016 13:50:15 -0500
-Subject: [PATCH 011/260] Fix some s390x declarations.
-
-s/S390x/S390X/
----
- src/Makefile | 3 ++-
- src/lj_arch.h | 29 +++++++++++++++++++----------
- 3 files changed, 22 insertions(+), 12 deletions(-)
-
-diff --git a/src/Makefile b/src/Makefile
-index 40cd10159..1450adc03 100644
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -238,7 +238,7 @@ else
- ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= arm
- else
--ifneq (,$(findstring LJ_TARGET_S390x ,$(TARGET_TESTARCH)))
-+ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= s390x
- else
- ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
-@@ -269,6 +269,7 @@ endif
- endif
- endif
- endif
-+endif
-
- ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
- TARGET_SYS= PS3
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 2638a9412..f699e90c3 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -29,7 +29,8 @@
- #define LUAJIT_ARCH_mips32 6
- #define LUAJIT_ARCH_MIPS64 7
- #define LUAJIT_ARCH_mips64 7
--#define LUAJIT_ARCH_S390x 8
-+#define LUAJIT_ARCH_S390X 8
-+#define LUAJIT_ARCH_s390x 8
-
- /* Target OS. */
- #define LUAJIT_OS_OTHER 0
-@@ -50,8 +51,8 @@
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM
- #elif defined(__aarch64__)
- #define LUAJIT_TARGET LUAJIT_ARCH_ARM64
--#elif defined(__s390x__) || defined(__s390x) || defined(__S390x__) || defined(__S390x) || defined(S390x)
--#define LUAJIT_TARGET LUAJIT_ARCH_S390x
-+#elif defined(__s390x__) || defined(__s390x)
-+#define LUAJIT_TARGET LUAJIT_ARCH_S390X
- #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
- #define LUAJIT_TARGET LUAJIT_ARCH_PPC
- #elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
-@@ -233,13 +234,6 @@
-
- #define LJ_ARCH_VERSION 80
-
--#elif LUAJIT_TARGET == LUAJIT_ARCH_S390
--
-- #define LJ_ARCH_NAME "s390x"
-- #define LJ_ARCH_BITS 64
-- #define LJ_ARCH_ENDIAN LUAJIT_BE
-- #define LJ_TARGET_S390 1
--
- #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-
- #ifndef LJ_ARCH_ENDIAN
-@@ -362,6 +356,21 @@
- #define LJ_ARCH_VERSION 10
- #endif
-
-+#elif LUAJIT_TARGET == LUAJIT_ARCH_S390X
-+
-+#define LJ_ARCH_NAME "s390x"
-+#define LJ_ARCH_BITS 64
-+#define LJ_ARCH_ENDIAN LUAJIT_BE
-+#define LJ_TARGET_S390X 1
-+#define LJ_TARGET_EHRETREG 0
-+#define LJ_TARGET_JUMPRANGE 32 /* +-2^32 = +-4GB (32-bit, halfword aligned) */
-+#define LJ_TARGET_MASKSHIFT 1
-+#define LJ_TARGET_MASKROT 1
-+#define LJ_TARGET_UNALIGNED 1
-+#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
-+#define LJ_TARGET_GC64 1
-+#define LJ_ARCH_NOJIT 1 /* NYI */
-+
- #else
- #error "No target architecture defined"
- #endif
-
-From 71d40ba670d0d28be3b473a1e3042e82bf1ce9c1 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 15 Nov 2016 14:39:34 -0500
-Subject: [PATCH 012/260] Add some s390x C calling convention constants.
-
-Guesses for now based on the ELF ABI supplement for zSeries.
----
- src/host/buildvm.c | 2 ++
- src/lj_ccall.h | 11 +++++++++++
- 2 files changed, 13 insertions(+)
-
-diff --git a/src/host/buildvm.c b/src/host/buildvm.c
-index 57b4dc973..ad2a8171d 100644
---- a/src/host/buildvm.c
-+++ b/src/host/buildvm.c
-@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
- #include "../dynasm/dasm_ppc.h"
- #elif LJ_TARGET_MIPS
- #include "../dynasm/dasm_mips.h"
-+#elif LJ_TARGET_S390X
-+#include "../dynasm/dasm_s390x.h"
- #else
- #error "No support for this architecture (yet)"
- #endif
-diff --git a/src/lj_ccall.h b/src/lj_ccall.h
-index d97227a6b..2a10a5e88 100644
---- a/src/lj_ccall.h
-+++ b/src/lj_ccall.h
-@@ -126,6 +126,17 @@ typedef union FPRArg {
- struct { LJ_ENDIAN_LOHI(float f; , float g;) };
- } FPRArg;
-
-+#elif LJ_TARGET_S390X
-+
-+#define CCALL_NARG_GPR 5 /* GPR 2,3,4,5,6 */
-+#define CCALL_NARG_FPR 4 /* FPR 0,2,4,8 */
-+#define CCALL_NRET_GPR 1 /* GPR 2 */
-+#define CCALL_NRET_FPR 1 /* FPR 0 */
-+#define CCALL_SPS_EXTRA 20 /* 160-byte callee save area (not sure if this is the right place) */
-+#define CCALL_SPS_FREE 0
-+
-+typedef intptr_t GPRArg;
-+typedef double FPRArg;
- #else
- #error "Missing calling convention definitions for this architecture"
- #endif
-
-From 820fa8a0495b69090ef84d32822adb8a24aa42f0 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 15 Nov 2016 14:53:00 -0500
-Subject: [PATCH 013/260] Delete gcc version check for now.
-
-Stick to the default until we know what we actually need.
----
- src/lj_arch.h | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index f699e90c3..31503e83e 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -398,10 +398,6 @@
- #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
- #error "Need at least Clang 3.5 or newer"
- #endif
--#elif LJ_TARGET_S390x
--#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
--#error "Need at least GCC 4.2 or newer"
--#endif
- #else
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
- #error "Need at least GCC 4.8 or newer"
-
-From 3e472eb2615737916f0fb4a2a59b36cae73f3934 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 10:31:34 +0530
-Subject: [PATCH 014/260] Update lj_arch.h
-
-Added missing elif condition for s390x for GCC dependency
----
- src/lj_arch.h | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 31503e83e..7f24386e1 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -393,6 +393,10 @@
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
-+#elif LJ_TARGET_S390x
-+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
-+#error "Need at least GCC 4.2 or newer"
-+#endif
- #elif LJ_TARGET_ARM64
- #if __clang__
- #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
-
-From 31fb648a50a3cb854b9c4ff771b8b6e34da85163 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 10:32:53 +0530
-Subject: [PATCH 015/260] Update lj_arch.h
-
-Removing the gcc check for now .. missed micheal's comment earlier
----
- src/lj_arch.h | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/lj_arch.h b/src/lj_arch.h
-index 7f24386e1..31503e83e 100644
---- a/src/lj_arch.h
-+++ b/src/lj_arch.h
-@@ -393,10 +393,6 @@
- #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
- #error "Need at least GCC 4.2 or newer"
- #endif
--#elif LJ_TARGET_S390x
--#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
--#error "Need at least GCC 4.2 or newer"
--#endif
- #elif LJ_TARGET_ARM64
- #if __clang__
- #if (__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)
-
-From 498f028e69d81bfe6718dc24f71c93ae58130a23 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 11:50:46 +0530
-Subject: [PATCH 016/260] Update lj_target_s390x.h
-
-changed instruction opcode to 64bit
----
- src/lj_target_s390x.h | 30 +++++++++++++++---------------
- 1 file changed, 15 insertions(+), 15 deletions(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 27bb34963..551bb7d46 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -154,27 +154,27 @@ typedef struct {
- #define ARMF_RSH(sh, r) (0x10 | ((sh) << 5) | ARMF_S(r))
-
- typedef enum S390xIns {
-- S390I_SR = 0x1B000000,
-- S390I_AR = 0x1A000000,
-- S390I_NR = 0x14000000,
-- S390I_XR = 0x17000000,
-- S390I_MR = 0x1C000000,
-- S390I_LR = 0x18000000,
-- S390I_C = 0x59000000,
-- S390I_LH = 0x48000000,
-- S390I_BASR = 0x0D000000,
-- S390I_MVCL = 0x0e000000,
-- S390I_ST = 0x50000000,
-- S390I_TM = 0x91000000,
-- S390I_MP = 0xbd000090,
-- S390I_CLR = 0x15000000,
-+ S390I_SR = 0x1B00000000000000,
-+ S390I_AR = 0x1A00000000000000,
-+ S390I_NR = 0x1400000000000000,
-+ S390I_XR = 0x1700000000000000,
-+ S390I_MR = 0x1C00000000000000,
-+ S390I_LR = 0x1800000000000000,
-+ S390I_C = 0x5900000000000000,
-+ S390I_LH = 0x4800000000000000,
-+ S390I_BASR = 0x0D00000000000000,
-+ S390I_MVCL = 0x0e00000000000000,
-+ S390I_ST = 0x5000000000000000,
-+ S390I_TM = 0x9100000000000000,
-+ S390I_MP = 0xbd00009000000000,
-+ S390I_CLR = 0x1500000000000000,
- } S390xIns;
-
- typedef enum S390xShift {
- S390SH_SLL, S390SH_SRL, S390SH_SRA
- } S390xShift;
-
--/* ARM condition codes. */
-+/* S390x condition codes. */
- typedef enum S390xCC {
-
- } S390xCC;
-
-From dcb977d1db91ea6600faf173cbd79df3aaff7c2e Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Wed, 16 Nov 2016 14:44:12 +0530
-Subject: [PATCH 017/260] Changed the encoding for add,and,branch instructions
-
----
- dynasm/dasm_s390x.lua | 116 +++++++++++++++++++++++++++---------------
- 1 file changed, 76 insertions(+), 40 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index a0a50e1e1..3542e7ee0 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -653,47 +653,83 @@ end)
- -- Template strings for ARM instructions.
- map_op = {
- -- Basic data processing instructions.
-- add_2 = "00000000005a0000RX-a|00000000001aRR|00000000b9f80000RRF-a|00000000e35a0000RXY-a|00000000e3080000RXY-a",
--
---- and has several possible ways, need to find one, currently added two type of
-- and_2 = "0000000000540000RX-a|00000000140000RR|00000000b9f4RRF-a|00000000e3540000RXY-a|00000000b9800000RRE| 00000000b9e40000RRF-a",
-- and_c = "0000000000d40000SS-a",
-- and_i = "0000000000940000SI|00000000eb540000SIY",
-+ --add
-+ ar = "0000000000001a00", --RR
-+ ay = "0000e3000000005a", --RXY-a
-+ ag = "0000e30000000008",
-+ agr = "00000000b9080000", --RRE
-+ agf = "0000e30000000018",
-+ agfr = "00000000b9180000",
-+ agbr = "00000000b34a0000",
-+ adbr = "00000000b31a0000",
-+ aebr = "00000000b30a0000",
-+ ah = "000000004a000000", --RXa
-+ ahy = "0000e3000000007a",
-+ afi = "0000c20900000000", --RIL-a --pls check if this is correct
-+ agfi = "0000c20800000000",
-+ aih = "0000cc0800000000",
-+ al = "000000005e000000",
-+ alr = "0000000000001e00",
-+ aly = "0000e3000000005e", -- RXY-a
-+ alg = "0000e3000000000a",
-+ algr = "00000000b90a0000",
-+ algf = "0000e3000000001a",
-+ algfr = "00000000b91a0000",
-+ alfi = "0000c20b00000000",
-+ algfi = "0000c20a00000000",
-+ alc = "0000e30000000098",
-+ alcr = "00000000b9980000", -- RRE
-+ alcg = "0000e30000000088",
-+ alcgr = "00000000b9880000",
-+ alsih = "0000cc0a00000000",
-+ alsihn ="0000cc0b00000000",
-+ axr = "0000000000003600", -- RR
-+ ad = "000000006a000000", -- Rx-a
-+ adr = "0000000000002a00",
-+ ae = "000000007a000000",
-+ aer = "0000000000003a00",
-+ aw = "000000006e000000",
-+ awr = "0000000000002e00",
-+ au = "000000007e000000",
-+ aur = "0000000000003e00",
-+
-+-- and
-+ n = "0000000054000000",
-+ nr = "0000000000001400",
-+ ny = "0000e30000000054", -- RXY-a
-+ ng = "0000e30000000080",
-+ ngr = "00000000b9800000",
-+ nihf = "0000c00a00000000", --RIL-a
-+ nihl = "0000c00b00000000",
-
--and_2 = "0000000000540000RX-a|0000000000140000RR|00000000b9f40000RRF-a|00000000e3540000RXY-a",
-- and_3 = "00000000e3800000RXY-a|00000000b9800000RRE|00000000b9e40000RRF-a",
-- and_c = "0000000000d40000SS-a",
-- and_i = "0000000000940000SI",
-- and_i4 = "00000000eb540000SIY"
-- and_i3 = "000000000a540000RI-a|000000000a550000RI-a|000000000c0a0000RIL-a|000000000a560000RI-a|000000000a570000RI-a|000000000c0bRIL-a"
-- --branch related instrcutions
-- bal = "0000000000450000RX-a",
-- balr = "0000000000050000RR",
-- bas = "00000000004d0000RX-a",
-- basr = "00000000000d0000RR",
-- bassm = "00000000000c0000RR",
-- bsm = "00000000000b0000RR",
-- bc = "0000000000470000Rx-b",
-- bcr = "00000000000070000RR",
-- bct = "0000000000460000RX-a",
-- bctr = "0000000000060000RR",
-- bctg = "00000000e3460000RXY-a",
-- bctgr = "00000000b9460000RRE",
-- bxh = "0000000000860000RS-a",
-- bxhg = "00000000eb440000RSY-a",
-- bxle = "0000000000870000RS-a",
-- bxleg = "00000000eb450000RSY-a",
-- bras = "000000000a750000RI-b",
-- brasl = "000000000c050000RIL-b",
-- brc = "000000000a740000RI-c",
-- brcl = "000000000c040000RIL-c",
-- brct = "000000000a760000RI-b",
-- brctg = "000000000a770000RI-b",
-- brctg = "00000000occ60000RIL-b",
-- brxh = "0000000000840000RSI",
-- brxhg = "00000000ec440000RIE-e",
-- brxle = "0000000000850000RSI",
-- brxlg = "00000000ec450000RIE-e",
-+ --branch related instrcutions
-+ bal = "0000000045000000", --RX-a
-+ balr = "0000000000005000", --RR
-+ bas = "000000004d000000",
-+ basr = "0000000000000d00", -- this has leading zero in the instrcution opcode: 0d, need to take into consideration
-+ bassm = "0000000000000c00",
-+ bsm = "0000000000000b00",
-+ bc = "0000000047000000",
-+ bcr = "0000000000000700",
-+ bct = "0000000046000000",
-+ bctr = "0000000000000600",
-+ bctg = "0000e30000000046",
-+ bctgr = "00000000b9460000",
-+ bxh = "0000000086000000", --RS-a
-+ bxhg = "0000eb0000000044",
-+ bxle = "0000000087000000",
-+ bxleg = "0000eb0000000045", -- RSY-a
-+ --bras = "000000000a750000RI-b",
-+ brasl = "0000c00500000000", --RIL-b
-+ --brc = "000000000a740000RI-c",
-+ brcl = "0000c00400000000", --RIL-c
-+ --brct = "000000000a760000RI-b",
-+ --brctg = "000000000a770000RI-b",
-+ brcth = "0000cc0600000000",
-+ --brxh = "0000000000840000RSI",
-+ --brxhg = "00000000ec440000RIE-e",
-+ --brxle = "0000000000850000RSI",
-+ --brxlg = "00000000ec450000RIE-e",
-
- ----subtraction (basic operation)
- sub = "00000000005b0000RX-a"
-
-From 777b0671d3d496be7f2f53449943c40de0f6da9c Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 15:09:59 +0530
-Subject: [PATCH 018/260] Update lj_target_s390x.h
-
-Added s390x specific condition codes
----
- src/lj_target_s390x.h | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/src/lj_target_s390x.h b/src/lj_target_s390x.h
-index 551bb7d46..4e35891a3 100644
---- a/src/lj_target_s390x.h
-+++ b/src/lj_target_s390x.h
-@@ -176,7 +176,14 @@ typedef enum S390xShift {
-
- /* S390x condition codes. */
- typedef enum S390xCC {
--
-+ /* Z- Zero , LZ - Less thena Zero , GZ - Greater than Zero
-+ O - Overflow , NZ - Not Zero , ZC - Zero with carry
-+ NZC - No Zero with carry , ZNC - Zero with No Carry
-+ EQ - Equal , NE - Not Equal , LO - Loq , HI - High
-+ */
-+ CC_Z , CC_LZ , CC_GZ , CC_O ,
-+ CC_NZ , CC_ZC , CC_NZC ,
-+ CC_ZNC , CC_EQ , CC_NE , CC_LO , CC_HI
- } S390xCC;
-
- #endif
-
-From 547b158ba4a82907db676460acaa2a7ba89680b4 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 15:34:32 +0530
-Subject: [PATCH 019/260] Update vm_s390x.dasc
-
-made some changes like mentioning arch from x86 to S390x
-removed some x86 specific code
----
- src/vm_s390x.dasc | 195 +---------------------------------------------
- 1 file changed, 3 insertions(+), 192 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index d7d618d3b..7f12f625f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,12 +1,9 @@
--|// Low-level VM code for x86 CPUs.
-+|// Low-level VM code for S390x CPUs.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
--|.if P64
--|.arch x64
--|.else
--|.arch x86
--|.endif
-+
-+|.arch S390x
- |.section code_op, code_sub
- |
- |.actionlist build_actionlist
-@@ -16,13 +13,6 @@
- |
- |//-----------------------------------------------------------------------
- |
--|.if P64
--|.define X64, 1
--|.if WIN
--|.define X64WIN, 1
--|.endif
--|.endif
--|
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
- |.define BASE, edx // Not C callee-save, refetched anyway.
-@@ -119,10 +109,6 @@
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |//-----------------------------------------------------------------------
--|.if not X64 // x86 stack layout.
--|
--|.if WIN
--|
- |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
- |.macro saveregs_
- | push edi; push esi; push ebx
-@@ -138,51 +124,9 @@
- | pop ebx; pop esi; pop edi; pop ebp
- |.endmacro
- |
--|.else
--|
--|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
--|.macro saveregs_
--| push edi; push esi; push ebx
--| sub esp, CFRAME_SPACE
--|.endmacro
--|.macro restoreregs
--| add esp, CFRAME_SPACE
--| pop ebx; pop esi; pop edi; pop ebp
--|.endmacro
--|
--|.endif
--|
- |.macro saveregs
- | push ebp; saveregs_
- |.endmacro
--|
--|.if WIN
--|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
--|.define SAVE_NRES, aword [esp+aword*18]
--|.define SAVE_CFRAME, aword [esp+aword*17]
--|.define SAVE_L, aword [esp+aword*16]
--|//----- 16 byte aligned, ^^^ arguments from C caller
--|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
--|.define SAVE_R4, aword [esp+aword*14]
--|.define SAVE_R3, aword [esp+aword*13]
--|.define SAVE_R2, aword [esp+aword*12]
--|//----- 16 byte aligned
--|.define SAVE_R1, aword [esp+aword*11]
--|.define SEH_FUNC, aword [esp+aword*10]
--|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
--|.define UNUSED2, aword [esp+aword*8]
--|//----- 16 byte aligned
--|.define UNUSED1, aword [esp+aword*7]
--|.define SAVE_PC, aword [esp+aword*6]
--|.define TMP2, aword [esp+aword*5]
--|.define TMP1, aword [esp+aword*4]
--|//----- 16 byte aligned
--|.define ARG4, aword [esp+aword*3]
--|.define ARG3, aword [esp+aword*2]
--|.define ARG2, aword [esp+aword*1]
--|.define ARG1, aword [esp] //<-- esp while in interpreter.
--|//----- 16 byte aligned, ^^^ arguments for C callee
--|.else
- |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
- |.define SAVE_NRES, aword [esp+aword*14]
- |.define SAVE_CFRAME, aword [esp+aword*13]
-@@ -203,7 +147,6 @@
- |.define ARG2, aword [esp+aword*1]
- |.define ARG1, aword [esp] //<-- esp while in interpreter.
- |//----- 16 byte aligned, ^^^ arguments for C callee
--|.endif
- |
- |// FPARGx overlaps ARGx and ARG(x+1) on x86.
- |.define FPARG3, qword [esp+qword*1]
-@@ -215,112 +158,6 @@
- |.define TMPa, TMP1
- |.define MULTRES, TMP2
- |
--|// Arguments for vm_call and vm_pcall.
--|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
--|
--|// Arguments for vm_cpcall.
--|.define INARG_CP_CALL, SAVE_ERRF
--|.define INARG_CP_UD, SAVE_NRES
--|.define INARG_CP_FUNC, SAVE_CFRAME
--|
--|//-----------------------------------------------------------------------
--|.elif X64WIN // x64/Windows stack layout
--|
--|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
--|.macro saveregs_
--| push rdi; push rsi; push rbx
--| sub rsp, CFRAME_SPACE
--|.endmacro
--|.macro saveregs
--| push rbp; saveregs_
--|.endmacro
--|.macro restoreregs
--| add rsp, CFRAME_SPACE
--| pop rbx; pop rsi; pop rdi; pop rbp
--|.endmacro
--|
--|.define SAVE_CFRAME, aword [rsp+aword*13]
--|.define SAVE_PC, dword [rsp+dword*25]
--|.define SAVE_L, dword [rsp+dword*24]
--|.define SAVE_ERRF, dword [rsp+dword*23]
--|.define SAVE_NRES, dword [rsp+dword*22]
--|.define TMP2, dword [rsp+dword*21]
--|.define TMP1, dword [rsp+dword*20]
--|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
--|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*8]
--|.define SAVE_R3, aword [rsp+aword*7]
--|.define SAVE_R2, aword [rsp+aword*6]
--|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.define ARG5, aword [rsp+aword*4]
--|.define CSAVE_4, aword [rsp+aword*3]
--|.define CSAVE_3, aword [rsp+aword*2]
--|.define CSAVE_2, aword [rsp+aword*1]
--|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
--|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
--|
--|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
--|.define TMPQ, qword [rsp+aword*10]
--|.define MULTRES, TMP2
--|.define TMPa, ARG5
--|.define ARG5d, dword [rsp+aword*4]
--|.define TMP3, ARG5d
--|
--|//-----------------------------------------------------------------------
--|.else // x64/POSIX stack layout
--|
--|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
--|.macro saveregs_
--| push rbx; push r15; push r14
--|.if NO_UNWIND
--| push r13; push r12
--|.endif
--| sub rsp, CFRAME_SPACE
--|.endmacro
--|.macro saveregs
--| push rbp; saveregs_
--|.endmacro
--|.macro restoreregs
--| add rsp, CFRAME_SPACE
--|.if NO_UNWIND
--| pop r12; pop r13
--|.endif
--| pop r14; pop r15; pop rbx; pop rbp
--|.endmacro
--|
--|//----- 16 byte aligned,
--|.if NO_UNWIND
--|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*10]
--|.define SAVE_R3, aword [rsp+aword*9]
--|.define SAVE_R2, aword [rsp+aword*8]
--|.define SAVE_R1, aword [rsp+aword*7]
--|.define SAVE_RU2, aword [rsp+aword*6]
--|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.else
--|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
--|.define SAVE_R4, aword [rsp+aword*8]
--|.define SAVE_R3, aword [rsp+aword*7]
--|.define SAVE_R2, aword [rsp+aword*6]
--|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
--|.endif
--|.define SAVE_CFRAME, aword [rsp+aword*4]
--|.define SAVE_PC, dword [rsp+dword*7]
--|.define SAVE_L, dword [rsp+dword*6]
--|.define SAVE_ERRF, dword [rsp+dword*5]
--|.define SAVE_NRES, dword [rsp+dword*4]
--|.define TMPa, aword [rsp+aword*1]
--|.define TMP2, dword [rsp+dword*1]
--|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
--|//----- 16 byte aligned
--|
--|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
--|.define TMPQ, qword [rsp]
--|.define TMP3, dword [rsp+aword*1]
--|.define MULTRES, TMP2
--|
--|.endif
--|
- |//-----------------------------------------------------------------------
- |
- |// Instruction headers.
-@@ -339,11 +176,6 @@
- | movzx OP, RCL
- | add PC, 4
- | shr RC, 16
--|.if X64
--| jmp aword [DISPATCH+OP*8]
--|.else
--| jmp aword [DISPATCH+OP*4]
--|.endif
- |.endmacro
- |
- |// Instruction footer.
-@@ -433,30 +265,9 @@
- | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
- |.endmacro
- |
--|// x87 compares.
--|.macro fcomparepp // Compare and pop st0 >< st1.
--| fucomip st1
--| fpop
--|.endmacro
- |
- |.macro fpop1; fstp st1; .endmacro
- |
--|// Synthesize SSE FP constants.
--|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
--|.if X64
--| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
--|.else
--| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
--|.endif
--|.endmacro
--|
--|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
--|.if X64
--| mov64 tmp, U64x(val,00000000); movd reg, tmp
--|.else
--| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
--|.endif
--|.endmacro
- |
- |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
- | sseconst_hi reg, tmp, 80000000
-
-From c36895a872079cdbb683a470edfa137b6ff50928 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Wed, 16 Nov 2016 17:19:10 +0530
-Subject: [PATCH 020/260] Update lj_frame.h
-
-Added CFrame definations for S390X
-values un assigned as i am unsure of the values
----
- src/lj_frame.h | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index d8d8cff29..017bdaf9e 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -200,6 +200,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
- #define CFRAME_OFS_MULTRES 192
- #define CFRAME_SIZE 208
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_TARGET_S390X
-+#define CFRAME_OFS_ERRF
-+#define CFRAME_OFS_NRES
-+#define CFRAME_OFS_PREV
-+#define CFRAME_OFS_L
-+#define CFRAME_OFS_PC
-+#define CFRAME_OFS_MULTRES
-+#define CFRAME_SIZE
-+#define CFRAME_SHIFT_MULTRES
- #elif LJ_TARGET_PPC
- #if LJ_TARGET_XBOX360
- #define CFRAME_OFS_ERRF 424
-
-From 4aa1099d6a5484f78cc6336e0987f1d541685228 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 17 Nov 2016 14:58:17 +0530
-Subject: [PATCH 021/260] Update vm_s390x.dasc
-
-Referred arm dasc file have created slots wherein i have to replace them with s390x registers and instructions
----
- src/vm_s390x.dasc | 219 +++++++++++++++++++++-------------------------
- 1 file changed, 101 insertions(+), 118 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 7f12f625f..ff599470b 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,81 +15,85 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, edx // Not C callee-save, refetched anyway.
--|.if not X64
--|.define KBASE, edi // Must be C callee-save.
--|.define KBASEa, KBASE
--|.define PC, esi // Must be C callee-save.
--|.define PCa, PC
--|.define DISPATCH, ebx // Must be C callee-save.
--|.elif X64WIN
--|.define KBASE, edi // Must be C callee-save.
--|.define KBASEa, rdi
--|.define PC, esi // Must be C callee-save.
--|.define PCa, rsi
--|.define DISPATCH, ebx // Must be C callee-save.
--|.else
--|.define KBASE, r15d // Must be C callee-save.
--|.define KBASEa, r15
--|.define PC, ebx // Must be C callee-save.
--|.define PCa, rbx
--|.define DISPATCH, r14d // Must be C callee-save.
--|.endif
-+.define BASE, // Base of current Lua stack frame.
-+|.define KBASE, // Constants of current Lua function.
-+|.define PC, // Next PC.
-+|.define GLREG, // Global state.
-+|.define LREG, // Register holding lua_State (also in SAVE_L).
-+|.define TISNUM, // Constant LJ_TISNUM << 47.
-+|.define TISNUMhi, // Constant LJ_TISNUM << 15.
-+|.define TISNIL, // Constant -1LL.
-+|.define fp, // Yes, we have to maintain a frame pointer.
- |
--|.define RA, ecx
--|.define RAH, ch
--|.define RAL, cl
--|.define RB, ebp // Must be ebp (C callee-save).
--|.define RC, eax // Must be eax.
--|.define RCW, ax
--|.define RCH, ah
--|.define RCL, al
--|.define OP, RB
--|.define RD, RC
--|.define RDW, RCW
--|.define RDL, RCL
--|.if X64
--|.define RAa, rcx
--|.define RBa, rbp
--|.define RCa, rax
--|.define RDa, rax
--|.else
--|.define RAa, RA
--|.define RBa, RB
--|.define RCa, RC
--|.define RDa, RD
--|.endif
-+|// The following temporaries are not saved across C calls, except for RA/RC.
-+|.define RA,
-+|.define RC,
-+|.define RB,
-+|.define RAw,
-+|.define RCw,
-+|.define RBw,
-+|.define INS,
-+|.define INSw,
-+|.define ITYPE,
-+|.define TMP0,
-+|.define TMP1,
-+|.define TMP2,
-+|.define TMP3,
-+|.define TMP0w,
-+|.define TMP1w,
-+|.define TMP2w,
-+|.define TMP3w,
- |
--|.if not X64
--|.define FCARG1, ecx // x86 fastcall arguments.
--|.define FCARG2, edx
--|.elif X64WIN
--|.define CARG1, rcx // x64/WIN64 C call arguments.
--|.define CARG2, rdx
--|.define CARG3, r8
--|.define CARG4, r9
--|.define CARG1d, ecx
--|.define CARG2d, edx
--|.define CARG3d, r8d
--|.define CARG4d, r9d
--|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
--|.define FCARG2, CARG2d
--|.else
--|.define CARG1, rdi // x64/POSIX C call arguments.
--|.define CARG2, rsi
--|.define CARG3, rdx
--|.define CARG4, rcx
--|.define CARG5, r8
--|.define CARG6, r9
--|.define CARG1d, edi
--|.define CARG2d, esi
--|.define CARG3d, edx
--|.define CARG4d, ecx
--|.define CARG5d, r8d
--|.define CARG6d, r9d
--|.define FCARG1, CARG1d // Simulate x86 fastcall.
--|.define FCARG2, CARG2d
--|.endif
-+|// Calling conventions. Also used as temporaries.
-+|.define CARG1,
-+|.define CARG2,
-+|.define CARG3,
-+|.define CARG4,
-+|.define CARG5,
-+|.define CARG1w,
-+|.define CARG2w,
-+|.define CARG3w,
-+|.define CARG4w,
-+|.define CARG5w,
-+|
-+|.define FARG1,
-+|.define FARG2,
-+|
-+|.define CRET1,
-+|.define CRET1w,
-+|// Stack layout while in interpreter. Must match with lj_frame.h.
-+|
-+|.define CFRAME_SPACE, 208
-+|//----- 16 byte aligned, <-- sp entering interpreter
-+|// Unused [sp, #204] // 32 bit values
-+|.define SAVE_NRES,
-+|.define SAVE_ERRF,
-+|.define SAVE_MULTRES,
-+|.define TMPD,
-+|.define SAVE_L,
-+|.define SAVE_PC,
-+|.define SAVE_CFRAME,
-+|.define SAVE_FPR_,
-+|.define SAVE_GPR_,
-+|.define SAVE_LR,
-+|.define SAVE_FP,
-+|//----- 16 byte aligned, <-- sp while in interpreter.
-+|
-+|.define TMPDofs,
-+|
-+|.macro save_, gpr1, gpr2, fpr1, fpr2
-+]
-+|.endmacro
-+|.macro rest_, gpr1, gpr2, fpr1, fpr2
-+]
-+|.endmacro
-+|
-+|.macro saveregs
-+
-+|.endmacro
-+|.macro restoreregs
-+
-+|.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
- |.type L, lua_State
-@@ -111,22 +115,16 @@
- |//-----------------------------------------------------------------------
- |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
- |.macro saveregs_
--| push edi; push esi; push ebx
--| push extern lj_err_unwind_win
--| fs; push dword [0]
--| fs; mov [0], esp
--| sub esp, CFRAME_SPACE
-+
- |.endmacro
- |.macro restoreregs
--| add esp, CFRAME_SPACE
--| fs; pop dword [0]
--| pop edi // Short for esp += 4.
--| pop ebx; pop esi; pop edi; pop ebp
-+
- |.endmacro
- |
- |.macro saveregs
--| push ebp; saveregs_
-+
- |.endmacro
-+
- |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
- |.define SAVE_NRES, aword [esp+aword*14]
- |.define SAVE_CFRAME, aword [esp+aword*13]
-@@ -164,18 +162,14 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
--|.macro ins_AB_; movzx RB, RCH; .endmacro
--|.macro ins_A_C; movzx RC, RCL; .endmacro
--|.macro ins_AND; not RDa; .endmacro
-+|.macro ins_ABC; .endmacro
-+|.macro ins_AB_; .endmacro
-+|.macro ins_A_C; .endmacro
-+|.macro ins_AND; .endmacro
- |
- |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
- |.macro ins_NEXT
--| mov RC, [PC]
--| movzx RA, RCH
--| movzx OP, RCL
--| add PC, 4
--| shr RC, 16
-+
- |.endmacro
- |
- |// Instruction footer.
-@@ -220,11 +214,11 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to test operand types.
--|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
--|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
--|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
--|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
--|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
-+|.macro checktp, .endmacro
-+|.macro checknum, .endmacro
-+|.macro checkint, .endmacro
-+|.macro checkstr, .endmacro
-+|.macro checktab, .endmacro
- |
- |// These operands must be used with movzx.
- |.define PC_OP, byte [PC-4]
-@@ -234,7 +228,7 @@
- |.define PC_RD, word [PC-2]
- |
- |.macro branchPC, reg
--| lea PC, [PC+reg*4-BCBIAS_J*4]
-+
- |.endmacro
- |
- |// Assumes DISPATCH is relative to GL.
-@@ -245,24 +239,16 @@
- |
- |// Decrement hashed hotcount and trigger trace recorder if zero.
- |.macro hotloop, reg
--| mov reg, PC
--| shr reg, 1
--| and reg, HOTCOUNT_PCMASK
--| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
--| jb ->vm_hotloop
-+
- |.endmacro
- |
- |.macro hotcall, reg
--| mov reg, PC
--| shr reg, 1
--| and reg, HOTCOUNT_PCMASK
--| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
--| jb ->vm_hotcall
-+
- |.endmacro
- |
- |// Set current VM state.
- |.macro set_vmstate, st
--| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
-+
- |.endmacro
- |
- |
-@@ -270,27 +256,24 @@
- |
- |
- |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
--| sseconst_hi reg, tmp, 80000000
-+|
- |.endmacro
- |.macro sseconst_1, reg, tmp // Synthesize 1.0.
--| sseconst_hi reg, tmp, 3ff00000
-+|
- |.endmacro
- |.macro sseconst_m1, reg, tmp // Synthesize -1.0.
--| sseconst_hi reg, tmp, bff00000
-+|
- |.endmacro
- |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
--| sseconst_hi reg, tmp, 43300000
-+|
- |.endmacro
- |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
--| sseconst_hi reg, tmp, 43380000
-+|
- |.endmacro
- |
- |// Move table write barrier back. Overwrites reg.
- |.macro barrierback, tab, reg
--| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
--| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
--| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
--| mov tab->gclist, reg
-+
- |.endmacro
- |
- |//-----------------------------------------------------------------------
-
-From 3ac644ecee044a90b806cc87626c3370471b14da Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 18 Nov 2016 17:09:20 +0530
-Subject: [PATCH 022/260] Update vm_s390x.dasc
-
-Assigned general purpose register to existing macros
----
- src/vm_s390x.dasc | 19 ++++++++++---------
- 1 file changed, 10 insertions(+), 9 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index ff599470b..656ed051f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,15 +15,15 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--.define BASE, // Base of current Lua stack frame.
--|.define KBASE, // Constants of current Lua function.
--|.define PC, // Next PC.
--|.define GLREG, // Global state.
--|.define LREG, // Register holding lua_State (also in SAVE_L).
--|.define TISNUM, // Constant LJ_TISNUM << 47.
--|.define TISNUMhi, // Constant LJ_TISNUM << 15.
--|.define TISNIL, // Constant -1LL.
--|.define fp, // Yes, we have to maintain a frame pointer.
-+|.define BASE, gr0 // Base of current Lua stack frame.
-+|.define KBASE, gr1 // Constants of current Lua function.
-+|.define PC, gr14 // Next PC.
-+|.define GLREG, gr2 // Global state.
-+|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
-+|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
-+|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
-+|.define TISNIL, gr6 // Constant -1LL.
-+|.define fp, gr7 // Yes, we have to maintain a frame pointer.
- |
- |// The following temporaries are not saved across C calls, except for RA/RC.
- |.define RA,
-@@ -66,6 +66,7 @@
- |.define CFRAME_SPACE, 208
- |//----- 16 byte aligned, <-- sp entering interpreter
- |// Unused [sp, #204] // 32 bit values
-+|
- |.define SAVE_NRES,
- |.define SAVE_ERRF,
- |.define SAVE_MULTRES,
-
-From e90d985d080eeede2aa623888fc13f4c8c3edd0e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Fri, 18 Nov 2016 13:06:31 -0500
-Subject: [PATCH 023/260] Look for s390x file rather than S390x file.
-
----
- src/vm_s390x.dasc | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 656ed051f..4b5ae2ad8 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,9 +1,9 @@
--|// Low-level VM code for S390x CPUs.
-+|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
-
--|.arch S390x
-+|.arch s390x
- |.section code_op, code_sub
- |
- |.actionlist build_actionlist
-
-From 31c0e6016a144aefefcb9ab4642aebb0b0633694 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Tue, 22 Nov 2016 10:20:56 +0530
-Subject: [PATCH 024/260] Update vm_s390x.dasc
-
-Added definitions to macros savereg and restreg
-used Store and Load instructions
-to store and load register contents to n from memory
----
- src/vm_s390x.dasc | 36 +++++++++++++++++-------------------
- 1 file changed, 17 insertions(+), 19 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 4b5ae2ad8..a9a383556 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -15,15 +15,15 @@
- |
- |// Fixed register assignments for the interpreter.
- |// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, gr0 // Base of current Lua stack frame.
--|.define KBASE, gr1 // Constants of current Lua function.
--|.define PC, gr14 // Next PC.
--|.define GLREG, gr2 // Global state.
--|.define LREG, gr3 // Register holding lua_State (also in SAVE_L).
--|.define TISNUM, gr4 // Constant LJ_TISNUM << 47.
--|.define TISNUMhi, gr5 // Constant LJ_TISNUM << 15.
--|.define TISNIL, gr6 // Constant -1LL.
--|.define fp, gr7 // Yes, we have to maintain a frame pointer.
-+|.define BASE, gr0
-+|.define KBASE, gr1
-+|.define PC, gr14
-+|.define GLREG, gr2
-+|.define LREG, gr3
-+|.define TISNUM, gr4
-+|.define TISNUMhi, gr5
-+|.define TISNIL, gr6
-+|.define fp, gr7
- |
- |// The following temporaries are not saved across C calls, except for RA/RC.
- |.define RA,
-@@ -82,18 +82,16 @@
- |
- |.define TMPDofs,
- |
--|.macro save_, gpr1, gpr2, fpr1, fpr2
--]
--|.endmacro
--|.macro rest_, gpr1, gpr2, fpr1, fpr2
--]
-+|.macro savereg arg1 arg2 arg3
-+| STG arg1; // Store 64bit content
-+| STG arg2; // Store 64bit content
-+| STG arg3; // Store 64bit content
- |.endmacro
- |
--|.macro saveregs
--
--|.endmacro
--|.macro restoreregs
--
-+|.macro restreg arg1 arg2 arg3
-+| LG arg1; // Load 64 bit content
-+| LG arg2; // Load 64 bit content
-+| LG arg3; // Load 64 bit content
- |.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
-
-From d50f8aa92b1022f4b4be40d8e4ff6badb0b1f336 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 22 Nov 2016 11:48:56 -0500
-Subject: [PATCH 025/260] Add preliminary frame offsets.
-
-These are educated guesses at this point. We might need more stack space because
-we don't have many free registers available.
----
- src/lj_asm.c | 2 +
- src/lj_frame.h | 18 +++----
- src/vm_s390x.dasc | 118 +++++++++++++++++++++++-----------------------
- 3 files changed, 70 insertions(+), 68 deletions(-)
-
-diff --git a/src/lj_asm.c b/src/lj_asm.c
-index 7ce589248..d427fa5b2 100644
---- a/src/lj_asm.c
-+++ b/src/lj_asm.c
-@@ -1567,6 +1567,8 @@ static void asm_loop(ASMState *as)
- #include "lj_asm_ppc.h"
- #elif LJ_TARGET_MIPS
- #include "lj_asm_mips.h"
-+#elif LJ_TARGET_S390X
-+#include "lj_asm_s390x.h"
- #else
- #error "Missing assembler for target CPU"
- #endif
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 017bdaf9e..65affb5da 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -200,15 +200,6 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
- #define CFRAME_OFS_MULTRES 192
- #define CFRAME_SIZE 208
- #define CFRAME_SHIFT_MULTRES 3
--#elif LJ_TARGET_S390X
--#define CFRAME_OFS_ERRF
--#define CFRAME_OFS_NRES
--#define CFRAME_OFS_PREV
--#define CFRAME_OFS_L
--#define CFRAME_OFS_PC
--#define CFRAME_OFS_MULTRES
--#define CFRAME_SIZE
--#define CFRAME_SHIFT_MULTRES
- #elif LJ_TARGET_PPC
- #if LJ_TARGET_XBOX360
- #define CFRAME_OFS_ERRF 424
-@@ -273,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
- #endif
- #define CFRAME_OFS_MULTRES 0
- #define CFRAME_SHIFT_MULTRES 3
-+#elif LJ_TARGET_S390X
-+#define CFRAME_OFS_ERRF 216
-+#define CFRAME_OFS_NRES 208
-+#define CFRAME_OFS_PREV 200
-+#define CFRAME_OFS_L 192
-+#define CFRAME_OFS_PC 168
-+#define CFRAME_OFS_MULTRES 160
-+#define CFRAME_SIZE 172
-+#define CFRAME_SHIFT_MULTRES 3
- #else
- #error "Missing CFRAME_* definitions for this architecture"
- #endif
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index a9a383556..dc30593e5 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -2,7 +2,22 @@
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
--
-+|// ELF ABI registers:
-+|// r0,r1 | | volatile |
-+|// r2 | parameter and return value | volatile |
-+|// r3-r5 | parameter | volatile |
-+|// r6 | parameter | saved |
-+|// r7-r11 | | saved |
-+|// r12 | GOT pointer (needed?) | saved |
-+|// r13 | literal pool (needed?) | saved |
-+|// r14 | return address | volatile |
-+|// r15 | stack pointer | saved |
-+|// f0,f2,f4,f6 | parameter and return value | volatile |
-+|// f1,f3,f5,f7 | | volatile |
-+|// f8-f15 | | saved |
-+|// ar0,ar1 | TLS | volatile |
-+|// ar2-ar15 | | volatile |
-+|
- |.arch s390x
- |.section code_op, code_sub
- |
-@@ -13,72 +28,57 @@
- |
- |//-----------------------------------------------------------------------
- |
--|// Fixed register assignments for the interpreter.
--|// This is very fragile and has many dependencies. Caveat emptor.
--|.define BASE, gr0
--|.define KBASE, gr1
--|.define PC, gr14
--|.define GLREG, gr2
--|.define LREG, gr3
--|.define TISNUM, gr4
--|.define TISNUMhi, gr5
--|.define TISNIL, gr6
--|.define fp, gr7
-+|// Fixed register assignments for the interpreter, callee-saved.
-+|.define BASE, r7 // Base of current Lua stack frame.
-+|.define KBASE, r8 // Constants of current Lua function.
-+|.define PC, r9 // Next PC.
-+|.define GLREG, r10 // Global state.
-+|.define LREG, r11 // Register holding lua_State (also in SAVE_L).
- |
--|// The following temporaries are not saved across C calls, except for RA/RC.
--|.define RA,
--|.define RC,
--|.define RB,
--|.define RAw,
--|.define RCw,
--|.define RBw,
--|.define INS,
--|.define INSw,
--|.define ITYPE,
--|.define TMP0,
--|.define TMP1,
--|.define TMP2,
--|.define TMP3,
--|.define TMP0w,
--|.define TMP1w,
--|.define TMP2w,
--|.define TMP3w,
-+|// The following temporaries are not saved across C calls, except for RD.
-+|.define RA, r0 // Cannot be dereferenced.
-+|.define RB, r1
-+|.define RC, r5 // Overlaps CARG4.
-+|.define RD, r6 // Overlaps CARG5. Callee-saved.
- |
- |// Calling conventions. Also used as temporaries.
--|.define CARG1,
--|.define CARG2,
--|.define CARG3,
--|.define CARG4,
--|.define CARG5,
--|.define CARG1w,
--|.define CARG2w,
--|.define CARG3w,
--|.define CARG4w,
--|.define CARG5w,
-+|.define CARG1, r2
-+|.define CARG2, r3
-+|.define CARG3, r4
-+|.define CARG4, r5
-+|.define CARG5, r6
-+|
-+|.define FARG1, f0
-+|.define FARG2, f2
-+|.define FARG3, f4
-+|.define FARG4, f6
- |
--|.define FARG1,
--|.define FARG2,
-+|.define CRET1, r2
-+|
-+|.define SP, r15
- |
--|.define CRET1,
--|.define CRET1w,
- |// Stack layout while in interpreter. Must match with lj_frame.h.
-+|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
-+|
-+|// Register save area.
-+|.define SAVE_FPR6, 328(SP)
-+|.define SAVE_FPR4, 320(SP)
-+|.define SAVE_FPR2, 312(SP)
-+|.define SAVE_FPR0, 304(SP)
-+|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
- |
--|.define CFRAME_SPACE, 208
--|//----- 16 byte aligned, <-- sp entering interpreter
--|// Unused [sp, #204] // 32 bit values
-+|// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
-+|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
-+|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
-+|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
-+|.define SAVE_L, 192(SP) // Argument 1, in r2.
-+|.define RESERVED, 184(SP) // Reserved for compiler use.
-+|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
-+|.define SAVE_PC, 168(SP)
-+|.define SAVE_MULTRES, 160(SP)
- |
--|.define SAVE_NRES,
--|.define SAVE_ERRF,
--|.define SAVE_MULTRES,
--|.define TMPD,
--|.define SAVE_L,
--|.define SAVE_PC,
--|.define SAVE_CFRAME,
--|.define SAVE_FPR_,
--|.define SAVE_GPR_,
--|.define SAVE_LR,
--|.define SAVE_FP,
--|//----- 16 byte aligned, <-- sp while in interpreter.
-+|// Callee save area (allocated by interpreter).
-+|.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
- |.define TMPDofs,
- |
-
-From d505a0e0bae07611f6361af6823e072e7f8b9b84 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 22 Nov 2016 13:47:35 -0500
-Subject: [PATCH 026/260] Cleanup.
-
----
- src/vm_s390x.dasc | 47 -----------------------------------------------
- 1 file changed, 47 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index dc30593e5..44c056d36 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -80,8 +80,6 @@
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
--|.define TMPDofs,
--|
- |.macro savereg arg1 arg2 arg3
- | STG arg1; // Store 64bit content
- | STG arg2; // Store 64bit content
-@@ -110,51 +108,6 @@
- |.type TRACE, GCtrace
- |.type SBUF, SBuf
- |
--|// Stack layout while in interpreter. Must match with lj_frame.h.
--|//-----------------------------------------------------------------------
--|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
--|.macro saveregs_
--
--|.endmacro
--|.macro restoreregs
--
--|.endmacro
--|
--|.macro saveregs
--
--|.endmacro
--
--|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
--|.define SAVE_NRES, aword [esp+aword*14]
--|.define SAVE_CFRAME, aword [esp+aword*13]
--|.define SAVE_L, aword [esp+aword*12]
--|//----- 16 byte aligned, ^^^ arguments from C caller
--|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
--|.define SAVE_R4, aword [esp+aword*10]
--|.define SAVE_R3, aword [esp+aword*9]
--|.define SAVE_R2, aword [esp+aword*8]
--|//----- 16 byte aligned
--|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
--|.define SAVE_PC, aword [esp+aword*6]
--|.define TMP2, aword [esp+aword*5]
--|.define TMP1, aword [esp+aword*4]
--|//----- 16 byte aligned
--|.define ARG4, aword [esp+aword*3]
--|.define ARG3, aword [esp+aword*2]
--|.define ARG2, aword [esp+aword*1]
--|.define ARG1, aword [esp] //<-- esp while in interpreter.
--|//----- 16 byte aligned, ^^^ arguments for C callee
--|
--|// FPARGx overlaps ARGx and ARG(x+1) on x86.
--|.define FPARG3, qword [esp+qword*1]
--|.define FPARG1, qword [esp]
--|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
--|.define TMPQ, qword [esp+aword*4]
--|.define TMP3, ARG4
--|.define ARG5, TMP1
--|.define TMPa, TMP1
--|.define MULTRES, TMP2
--|
- |//-----------------------------------------------------------------------
- |
- |// Instruction headers.
-
-From 5a69b4638a5eb960e9cd6f95611d5d5b6fdcd21d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 22 Nov 2016 13:58:10 -0500
-Subject: [PATCH 027/260] Fixup the save/restore register macros.
-
-I believe these macros obey the C calling convention, so we need to
-allocate our stack frame and save all callee-save registers. We
-can tune it later if it turns out we don't need all the registers.
----
- src/vm_s390x.dasc | 23 +++++++++++++++--------
- 1 file changed, 15 insertions(+), 8 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 44c056d36..49ea335a3 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -80,16 +80,23 @@
- |// Callee save area (allocated by interpreter).
- |.define CALLEESAVE 000(SP) // <- SP in interpreter.
- |
--|.macro savereg arg1 arg2 arg3
--| STG arg1; // Store 64bit content
--| STG arg2; // Store 64bit content
--| STG arg3; // Store 64bit content
-+|.macro saveregs
-+| lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
-+| stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+| std f0, SAVE_FPR0
-+| std f2, SAVE_FPR2
-+| std f4, SAVE_FPR4
-+| std f6, SAVE_FPR6
- |.endmacro
- |
--|.macro restreg arg1 arg2 arg3
--| LG arg1; // Load 64 bit content
--| LG arg2; // Load 64 bit content
--| LG arg3; // Load 64 bit content
-+|.macro restoreregs
-+| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
-+| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
-+| ld f0, SAVE_FPR0
-+| ld f2, SAVE_FPR2
-+| ld f4, SAVE_FPR4
-+| ld f6, SAVE_FPR6
-+|// br r14 to return?
- |.endmacro
- |
- |// Type definitions. Some of these are only used for documentation.
-
-From dbf789536cfea0b3ac0a1f0a16a807b807735837 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Wed, 23 Nov 2016 17:30:10 -0500
-Subject: [PATCH 028/260] Fix stack frame layout.
-
-f8-f15 are callee-saved (not f0,f2,f4 and f6). There isn't space
-for them in the caller's stack frame so we need to increase the
-size of the interpreter's stack frame.
----
- src/lj_frame.h | 10 ++++-----
- src/vm_s390x.dasc | 57 +++++++++++++++++++++++++++++------------------
- 2 files changed, 40 insertions(+), 27 deletions(-)
-
-diff --git a/src/lj_frame.h b/src/lj_frame.h
-index 65affb5da..0b90f1421 100644
---- a/src/lj_frame.h
-+++ b/src/lj_frame.h
-@@ -265,13 +265,13 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
- #define CFRAME_OFS_MULTRES 0
- #define CFRAME_SHIFT_MULTRES 3
- #elif LJ_TARGET_S390X
--#define CFRAME_OFS_ERRF 216
--#define CFRAME_OFS_NRES 208
--#define CFRAME_OFS_PREV 200
--#define CFRAME_OFS_L 192
-+#define CFRAME_OFS_ERRF 280
-+#define CFRAME_OFS_NRES 272
-+#define CFRAME_OFS_PREV 264
-+#define CFRAME_OFS_L 256
- #define CFRAME_OFS_PC 168
- #define CFRAME_OFS_MULTRES 160
--#define CFRAME_SIZE 172
-+#define CFRAME_SIZE 240
- #define CFRAME_SHIFT_MULTRES 3
- #else
- #error "Missing CFRAME_* definitions for this architecture"
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 49ea335a3..f54711177 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -58,22 +58,28 @@
- |.define SP, r15
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
--|.define CFRAME_SPACE, 176 // Delta for SP, 8 byte aligned.
-+|.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
- |
- |// Register save area.
--|.define SAVE_FPR6, 328(SP)
--|.define SAVE_FPR4, 320(SP)
--|.define SAVE_FPR2, 312(SP)
--|.define SAVE_FPR0, 304(SP)
--|.define SAVE_GPRS, 224(SP) // Save area for r6-r15 (10*8 bytes).
-+|.define SAVE_GPRS, 288(SP) // Save area for r6-r15 (10*8 bytes).
- |
- |// Argument save area, each slot is 8-bytes (32-bit types are sign/zero extended).
--|.define SAVE_ERRF, 216(SP) // Argument 4, in r5.
--|.define SAVE_NRES, 208(SP) // Argument 3, in r4.
--|.define SAVE_CFRAME, 200(SP) // Argument 2, in r3.
--|.define SAVE_L, 192(SP) // Argument 1, in r2.
--|.define RESERVED, 184(SP) // Reserved for compiler use.
--|.define BACKCHAIN, 176(SP) // <- SP entering interpreter.
-+|.define SAVE_ERRF, 280(SP) // Argument 4, in r5.
-+|.define SAVE_NRES, 272(SP) // Argument 3, in r4.
-+|.define SAVE_CFRAME, 264(SP) // Argument 2, in r3.
-+|.define SAVE_L, 256(SP) // Argument 1, in r2.
-+|.define RESERVED, 248(SP) // Reserved for compiler use.
-+|.define BACKCHAIN, 240(SP) // <- SP entering interpreter.
-+|
-+|// Interpreter stack frame.
-+|.define SAVE_FPR15, 232(SP)
-+|.define SAVE_FPR14, 224(SP)
-+|.define SAVE_FPR13, 216(SP)
-+|.define SAVE_FPR12, 208(SP)
-+|.define SAVE_FPR11, 200(SP)
-+|.define SAVE_FPR10, 192(SP)
-+|.define SAVE_FPR9, 184(SP)
-+|.define SAVE_FPR8, 176(SP)
- |.define SAVE_PC, 168(SP)
- |.define SAVE_MULTRES, 160(SP)
- |
-@@ -83,19 +89,26 @@
- |.macro saveregs
- | lay SP, -CFRAME_SPACE(SP) // Allocate stack frame.
- | stmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
--| std f0, SAVE_FPR0
--| std f2, SAVE_FPR2
--| std f4, SAVE_FPR4
--| std f6, SAVE_FPR6
-+| std f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| std f9, SAVE_FPR9
-+| std f10, SAVE_FPR10
-+| std f11, SAVE_FPR11
-+| std f12, SAVE_FPR12
-+| std f13, SAVE_FPR13
-+| std f14, SAVE_FPR14
-+| std f15, SAVE_FPR15
- |.endmacro
- |
- |.macro restoreregs
--| la SP, CFRAME_SPACE(SP) // De-allocate stack frame.
--| lmg r6, r15, SAVE_GPRS // Technically we restore r15 regardless.
--| ld f0, SAVE_FPR0
--| ld f2, SAVE_FPR2
--| ld f4, SAVE_FPR4
--| ld f6, SAVE_FPR6
-+| ld f8, SAVE_FPR8 // f8-f15 are callee-saved.
-+| ld f9, SAVE_FPR9
-+| ld f10, SAVE_FPR10
-+| ld f11, SAVE_FPR11
-+| ld f12, SAVE_FPR12
-+| ld f13, SAVE_FPR13
-+| ld f14, SAVE_FPR14
-+| ld f15, SAVE_FPR15
-+| lmg r6, r15, SAVE_GPRS // Restores the stack pointer.
- |// br r14 to return?
- |.endmacro
- |
-
-From 5887962b0e956264f91357f168db7a182aff0cba Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Wed, 23 Nov 2016 18:02:00 -0500
-Subject: [PATCH 029/260] Add assembly for decoding instructions.
-
-Still guessing at this point. This code will need to be changed.
----
- src/vm_s390x.dasc | 31 +++++++++++++++++++++++--------
- 1 file changed, 23 insertions(+), 8 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f54711177..f6f1adb1d 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -1,4 +1,4 @@
--|// Low-level VM code for IBM z/Architecture (s390x) CPUs.
-+|// Low-level VM code for IBM z/Architecture (s390x) CPUs in LJ_GC64 mode.
- |// Bytecode interpreter, fast functions and helper functions.
- |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
- |
-@@ -32,7 +32,7 @@
- |.define BASE, r7 // Base of current Lua stack frame.
- |.define KBASE, r8 // Constants of current Lua function.
- |.define PC, r9 // Next PC.
--|.define GLREG, r10 // Global state.
-+|.define DISPATCH, r10 // Opcode dispatch table.
- |.define LREG, r11 // Register holding lua_State (also in SAVE_L).
- |
- |// The following temporaries are not saved across C calls, except for RD.
-@@ -56,6 +56,8 @@
- |.define CRET1, r2
- |
- |.define SP, r15
-+|.define OP, r2
-+|.define TMP1, r3
- |
- |// Stack layout while in interpreter. Must match with lj_frame.h.
- |.define CFRAME_SPACE, 240 // Delta for SP, 8 byte aligned.
-@@ -134,14 +136,29 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; .endmacro
--|.macro ins_AB_; .endmacro
-+|.macro ins_ABC; .endmacro
-+|.macro ins_AB_; .endmacro
- |.macro ins_A_C; .endmacro
- |.macro ins_AND; .endmacro
- |
--|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
-+|// Instruction decode+dispatch.
-+| // TODO: tune this, right now we always decode RA-D even if they aren't used.
- |.macro ins_NEXT
--
-+| l RD, (PC)
-+| // 32 63
-+| // [ B | C | A | OP ]
-+| // [ D | A | OP ]
-+| llhr RA, RD
-+| srl RA, #8
-+| llcr OP, RD
-+| srl RD, #16
-+| lr RB, RD
-+| srl RB, #8
-+| llcr RC, RD
-+| la PC, 4(PC)
-+| llgfr TMP1, OP
-+| sll TMP1, #3 // TMP1=OP*8
-+| b 0(TMP1, DISPATCH)
- |.endmacro
- |
- |// Instruction footer.
-@@ -151,8 +168,6 @@
- | .define ins_next_, ins_NEXT
- |.else
- | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
--| // Affects only certain kinds of benchmarks (and only with -j off).
--| // Around 10%-30% slower on Core2, a lot more slower on P4.
- | .macro ins_next
- | jmp ->ins_next
- | .endmacro
-
-From 372f721e60691cbc22f0ca98edb4c7510ff35110 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 24 Nov 2016 11:25:07 +0530
-Subject: [PATCH 030/260] Update vm_s390x.dasc
-
-used MOVE LONG EXTENDED in place of mov and
-MOVE LONG instead of movzx
----
- src/vm_s390x.dasc | 15 +++++----------
- 1 file changed, 5 insertions(+), 10 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index f6f1adb1d..3758ee31e 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -180,21 +180,16 @@
- |// Call decode and dispatch.
- |.macro ins_callt
- | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
--| mov PC, LFUNC:RB->pc
--| mov RA, [PC]
--| movzx OP, RAL
--| movzx RA, RAH
-+| mvcle PC, LFUNC:RB->pc
-+| mvcle RA, [PC]
-+| movcl OP, RAL
-+| movcl RA, RAH
- | add PC, 4
--|.if X64
--| jmp aword [DISPATCH+OP*8]
--|.else
--| jmp aword [DISPATCH+OP*4]
--|.endif
- |.endmacro
- |
- |.macro ins_call
- | // BASE = new base, RB = LFUNC, RD = nargs+1
--| mov [BASE-4], PC
-+| mvcle [BASE-4], PC
- | ins_callt
- |.endmacro
- |
-
-From 4ea7607e02a74aad1a7102e4df1a464cb18d037a Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 24 Nov 2016 14:02:50 +0530
-Subject: [PATCH 031/260] Update vm_s390x.dasc
-
-added instructions to macros, referring macro defination of x86
-for macro ins_ANDdid not find equivalent s390x replacement instruction for 'Not' hence have currently marked the place as '????'
-
-'????' has to be replaced with s390x complement instruction
----
- src/vm_s390x.dasc | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index 3758ee31e..b2640e809 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -136,10 +136,10 @@
- |.macro ins_A; .endmacro
- |.macro ins_AD; .endmacro
- |.macro ins_AJ; .endmacro
--|.macro ins_ABC; .endmacro
--|.macro ins_AB_; .endmacro
--|.macro ins_A_C; .endmacro
--|.macro ins_AND; .endmacro
-+|.macro ins_ABC; mvcl RB, RCH; mvcl RC, RCL; .endmacro
-+|.macro ins_AB_; mvcl RB, RCH; .endmacro
-+|.macro ins_A_C; mvcl RC, RCL; .endmacro
-+|.macro ins_AND; ??? RD; .endmacro
- |
- |// Instruction decode+dispatch.
- | // TODO: tune this, right now we always decode RA-D even if they aren't used.
-
-From 3288e547bf6961fe04d79bd2e4f8daff819a47f0 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Thu, 24 Nov 2016 14:58:52 +0530
-Subject: [PATCH 032/260] Update vm_s390x.dasc
-
-added definations to macros to test operand type refeered x86 definations
-no JUMP instruction found for s390x used BRANCH RELATIVE on CONDITION instead (brc)
-Not sure how the condition will be checked , need to discuss this
----
- src/vm_s390x.dasc | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/src/vm_s390x.dasc b/src/vm_s390x.dasc
-index b2640e809..72fe5d26f 100644
---- a/src/vm_s390x.dasc
-+++ b/src/vm_s390x.dasc
-@@ -196,11 +196,11 @@
- |//-----------------------------------------------------------------------
- |
- |// Macros to test operand types.
--|.macro checktp, .endmacro
--|.macro checknum, .endmacro
--|.macro checkint, .endmacro
--|.macro checkstr, .endmacro
--|.macro checktab, .endmacro
-+|.macro checktp, reg, tp; CG dword [BASE+reg*8+4], tp; .endmacro
-+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is above or equal
-+|.macro checkint, reg, target; checktp reg, LJ_TISNUM; brc target; .endmacro // condition to chk is result is not equal
-+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; brc target; .endmacro // condition to chk is result is nto equal
-+|.macro checktab, reg, target; checktp reg, LJ_TTAB; brc target; .endmacro // condition to chk is result is nto equal
- |
- |// These operands must be used with movzx.
- |.define PC_OP, byte [PC-4]
-
-From 9b01b4dc6f1663aa9117b4903c4a22822e1bdf34 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 25 Nov 2016 19:44:04 +0530
-Subject: [PATCH 033/260] Added s390x instructions with their encoding
-
----
- dynasm/dasm_s390x.lua | 953 +++++++++++++++++++++++++-----------------
- 1 file changed, 575 insertions(+), 378 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 3542e7ee0..e39a27f1b 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -652,311 +652,565 @@ end)
-
- -- Template strings for ARM instructions.
- map_op = {
-- -- Basic data processing instructions.
-- --add
-- ar = "0000000000001a00", --RR
-- ay = "0000e3000000005a", --RXY-a
-- ag = "0000e30000000008",
-- agr = "00000000b9080000", --RRE
-- agf = "0000e30000000018",
-- agfr = "00000000b9180000",
-- agbr = "00000000b34a0000",
-- adbr = "00000000b31a0000",
-- aebr = "00000000b30a0000",
-- ah = "000000004a000000", --RXa
-- ahy = "0000e3000000007a",
-- afi = "0000c20900000000", --RIL-a --pls check if this is correct
-- agfi = "0000c20800000000",
-- aih = "0000cc0800000000",
-- al = "000000005e000000",
-- alr = "0000000000001e00",
-- aly = "0000e3000000005e", -- RXY-a
-- alg = "0000e3000000000a",
-- algr = "00000000b90a0000",
-- algf = "0000e3000000001a",
-- algfr = "00000000b91a0000",
-- alfi = "0000c20b00000000",
-- algfi = "0000c20a00000000",
-- alc = "0000e30000000098",
-- alcr = "00000000b9980000", -- RRE
-- alcg = "0000e30000000088",
-- alcgr = "00000000b9880000",
-- alsih = "0000cc0a00000000",
-- alsihn ="0000cc0b00000000",
-- axr = "0000000000003600", -- RR
-- ad = "000000006a000000", -- Rx-a
-- adr = "0000000000002a00",
-- ae = "000000007a000000",
-- aer = "0000000000003a00",
-- aw = "000000006e000000",
-- awr = "0000000000002e00",
-- au = "000000007e000000",
-- aur = "0000000000003e00",
--
---- and
-- n = "0000000054000000",
-- nr = "0000000000001400",
-- ny = "0000e30000000054", -- RXY-a
-- ng = "0000e30000000080",
-- ngr = "00000000b9800000",
-- nihf = "0000c00a00000000", --RIL-a
-- nihl = "0000c00b00000000",
--
-- --branch related instrcutions
-- bal = "0000000045000000", --RX-a
-- balr = "0000000000005000", --RR
-- bas = "000000004d000000",
-- basr = "0000000000000d00", -- this has leading zero in the instrcution opcode: 0d, need to take into consideration
-- bassm = "0000000000000c00",
-- bsm = "0000000000000b00",
-- bc = "0000000047000000",
-- bcr = "0000000000000700",
-- bct = "0000000046000000",
-- bctr = "0000000000000600",
-- bctg = "0000e30000000046",
-- bctgr = "00000000b9460000",
-- bxh = "0000000086000000", --RS-a
-- bxhg = "0000eb0000000044",
-- bxle = "0000000087000000",
-- bxleg = "0000eb0000000045", -- RSY-a
-- --bras = "000000000a750000RI-b",
-- brasl = "0000c00500000000", --RIL-b
-- --brc = "000000000a740000RI-c",
-- brcl = "0000c00400000000", --RIL-c
-- --brct = "000000000a760000RI-b",
-- --brctg = "000000000a770000RI-b",
-- brcth = "0000cc0600000000",
-- --brxh = "0000000000840000RSI",
-- --brxhg = "00000000ec440000RIE-e",
-- --brxle = "0000000000850000RSI",
-- --brxlg = "00000000ec450000RIE-e",
--
-- ----subtraction (basic operation)
-- sub = "00000000005b0000RX-a"
-- sr = "00000000001b0000RR"
-- srk = "00000000b9f90000RRF-a"
-- sy = "00000000e35b0000RXY-a"
-- sg = "00000000e3090000RXY-a"
-- sgr = "00000000b9090000RRE"
-- sgrk = "00000000b9e90000RRF-a"
-- sgf = "00000000e3190000RXY-a"
-- sgfr = "00000000b9190000RRE"
-- sh = "00000000004b0000RX-a"
-- shy = "00000000e37b0000RXY-a"
-- shhhr = "00000000b9c90000RRF-a"
-- shhlr = "00000000b9d90000RX-a"
-- sl = "00000000005f0000RX-a"
-- slr = "00000000001f0000RR"
-- slrk = "00000000b9f80000RR"
-- sly = "00000000e35f0000RXY-a",
-- slg = "00000000e30b0000RXY-a",
-- slgr = "00000000b9080000RRE",
-- slgrk = "00000000b9eb0000RRF-a",
-- slgf = "00000000e3180000RXY-a",
-- slgfr = "00000000b91b0000RRE",
-- slhhhr = "00000000b9cb0000RRF-a",
-- slhhlr = "00000000b9db0000RRF-a",
-- slfi = "000000000c250000RIL-a",
-- slgfi = "000000000c240000RIL-a",
-- slb = "00000000e3990000RXY-a",
-- slbr = "00000000b9990000RRE" ,
-- slbg = "00000000e3890000RXY-a",
-- slbgr = "00000000b9890000RXY-a",
--
-- cmp_2 = "0000000000590000RX-a|0000000000190000RR|00000000e3590000RXY-a",
-- cmp_3 = "00000000e3200000RXY-a|00000000b9200000RRE|00000000e3300000RXY-a| 00000000b9300000RRE",
--
-- div_2 = "00000000005d0000RX-a|00000000001d0000RR|00000000e3970000RXY-a|00000000b9970000RRE",
-- div_3 ="00000000e3870000RXY-a|00000000b9870000RRE",
-- div_sing ="00000000e30d0000RXY-a|00000000b90d0000RRE|00000000e31d0000RXY-a|00000000b91d0000RRE",
--
-- eor_2 = "0000000000570000RX-a|0000000000170000RR|00000000b9f70000RRF-a|00000000e3570000RXY-a",
-- eor_3 = "00000000e3820000RXY-a|00000000b9820000RRE|00000000b9e70000RRF-a|
-- eor_c = "0000000000d70000SS-a",
-- eor_i = "0000000000970000SI| 00000000eb570000|000000000c060000a|000000000c070000RIL-a",
--
-- -- load instruction to be added and the following instructions need to be changed (are not s390x related)
--
-- neg_2 = "4b0003e0DMg",
-- neg_3 = "4b0003e0DMSg",
-- negs_2 = "6b0003e0DMg",
-- negs_3 = "6b0003e0DMSg",
-- adc_3 = "1a000000DNMg",
-- adcs_3 = "3a000000DNMg",
-- sbc_3 = "5a000000DNMg",
-- sbcs_3 = "7a000000DNMg",
-- ngc_2 = "5a0003e0DMg",
-- ngcs_2 = "7a0003e0DMg",
-- and_3 = "0a000000DNMg|12000000pDNig",
-- and_4 = "0a000000DNMSg",
-- orr_3 = "2a000000DNMg|32000000pDNig",
-- orr_4 = "2a000000DNMSg",
-- eor_3 = "4a000000DNMg|52000000pDNig",
-- eor_4 = "4a000000DNMSg",
-- ands_3 = "6a000000DNMg|72000000DNig",
-- ands_4 = "6a000000DNMSg",
-- tst_2 = "6a00001fNMg|7200001fNig",
-- tst_3 = "6a00001fNMSg",
-- bic_3 = "0a200000DNMg",
-- bic_4 = "0a200000DNMSg",
-- orn_3 = "2a200000DNMg",
-- orn_4 = "2a200000DNMSg",
-- eon_3 = "4a200000DNMg",
-- eon_4 = "4a200000DNMSg",
-- bics_3 = "6a200000DNMg",
-- bics_4 = "6a200000DNMSg",
-- movn_2 = "12800000DWg",
-- movn_3 = "12800000DWRg",
-- movz_2 = "52800000DWg",
-- movz_3 = "52800000DWRg",
-- movk_2 = "72800000DWg",
-- movk_3 = "72800000DWRg",
-- -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
-- mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
-- mov_3 = "2a0003e0DMSg",
-- mvn_2 = "2a2003e0DMg",
-- mvn_3 = "2a2003e0DMSg",
-- adr_2 = "10000000DBx",
-- adrp_2 = "90000000DBx",
-- csel_4 = "1a800000DNMCg",
-- csinc_4 = "1a800400DNMCg",
-- csinv_4 = "5a800000DNMCg",
-- csneg_4 = "5a800400DNMCg",
-- cset_2 = "1a9f07e0Dcg",
-- csetm_2 = "5a9f03e0Dcg",
-- cinc_3 = "1a800400DNmcg",
-- cinv_3 = "5a800000DNmcg",
-- cneg_3 = "5a800400DNmcg",
-- ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
-- ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
-- madd_4 = "1b000000DNMAg",
-- msub_4 = "1b008000DNMAg",
-- mul_3 = "1b007c00DNMg",
-- mneg_3 = "1b00fc00DNMg",
-- smaddl_4 = "9b200000DxNMwAx",
-- smsubl_4 = "9b208000DxNMwAx",
-- smull_3 = "9b207c00DxNMw",
-- smnegl_3 = "9b20fc00DxNMw",
-- smulh_3 = "9b407c00DNMx",
-- umaddl_4 = "9ba00000DxNMwAx",
-- umsubl_4 = "9ba08000DxNMwAx",
-- umull_3 = "9ba07c00DxNMw",
-- umnegl_3 = "9ba0fc00DxNMw",
-- umulh_3 = "9bc07c00DNMx",
-- udiv_3 = "1ac00800DNMg",
-- sdiv_3 = "1ac00c00DNMg",
-- -- Bit operations.
-- sbfm_4 = "13000000DN12w|93400000DN12x",
-- bfm_4 = "33000000DN12w|b3400000DN12x",
-- ubfm_4 = "53000000DN12w|d3400000DN12x",
-- extr_4 = "13800000DNM2w|93c00000DNM2x",
-- sxtb_2 = "13001c00DNw|93401c00DNx",
-- sxth_2 = "13003c00DNw|93403c00DNx",
-- sxtw_2 = "93407c00DxNw",
-- uxtb_2 = "53001c00DNw",
-- uxth_2 = "53003c00DNw",
-- sbfx_4 = op_alias("sbfm_4", alias_bfx),
-- bfxil_4 = op_alias("bfm_4", alias_bfx),
-- ubfx_4 = op_alias("ubfm_4", alias_bfx),
-- sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
-- bfi_4 = op_alias("bfm_4", alias_bfiz),
-- ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
-- lsl_3 = function(params, nparams)
-- if params and params[3]:byte() == 35 then
-- return alias_lslimm(params, nparams)
-- else
-- return op_template(params, "1ac02000DNMg", nparams)
-- end
-- end,
-- lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
-- asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
-- ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
-- clz_2 = "5ac01000DNg",
-- cls_2 = "5ac01400DNg",
-- rbit_2 = "5ac00000DNg",
-- rev_2 = "5ac00800DNw|dac00c00DNx",
-- rev16_2 = "5ac00400DNg",
-- rev32_2 = "dac00800DNx",
-- -- Loads and stores.
-- ["strb_*"] = "38000000DwL",
-- ["ldrb_*"] = "38400000DwL",
-- ["ldrsb_*"] = "38c00000DwL|38800000DxL",
-- ["strh_*"] = "78000000DwL",
-- ["ldrh_*"] = "78400000DwL",
-- ["ldrsh_*"] = "78c00000DwL|78800000DxL",
-- ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
-- ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
-- ["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- -- NOTE: ldur etc. are handled by ldr et al.
-- ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-- ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
-- ["ldpsw_*"] = "68400000DAxP",
-- -- Branches.
-- b_1 = "14000000B",
-- bl_1 = "94000000B",
-- blr_1 = "d63f0000Nx",
-- br_1 = "d61f0000Nx",
-- ret_0 = "d65f03c0",
-- ret_1 = "d65f0000Nx",
-- -- b.cond is added below.
-- cbz_2 = "34000000DBg",
-- cbnz_2 = "35000000DBg",
-- tbz_3 = "36000000DTBw|36000000DTBx",
-- tbnz_3 = "37000000DTBw|37000000DTBx",
-- -- Miscellaneous instructions.
-- -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
-- -- TODO: sys, sysl, ic, dc, at, tlbi
-- -- TODO: hint, yield, wfe, wfi, sev, sevl
-- -- TODO: clrex, dsb, dmb, isb
-- nop_0 = "d503201f",
-- brk_0 = "d4200000",
-- brk_1 = "d4200000W",
-- -- Floating point instructions.
-- fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
-- fabs_2 = "1e20c000DNf",
-- fneg_2 = "1e214000DNf",
-- fsqrt_2 = "1e21c000DNf",
-- fcvt_2 = "1e22c000DdNs|1e624000DsNd",
-- -- TODO: half-precision and fixed-point conversions.
-- fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
-- fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
-- fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
-- fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
-- fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
-- fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
-- fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
-- fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
-- fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
-- fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
-- scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
-- ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
-- frintn_2 = "1e244000DNf",
-- frintp_2 = "1e24c000DNf",
-- frintm_2 = "1e254000DNf",
-- frintz_2 = "1e25c000DNf",
-- frinta_2 = "1e264000DNf",
-- frintx_2 = "1e274000DNf",
-- frinti_2 = "1e27c000DNf",
-- fadd_3 = "1e202800DNMf",
-- fsub_3 = "1e203800DNMf",
-- fmul_3 = "1e200800DNMf",
-- fnmul_3 = "1e208800DNMf",
-- fdiv_3 = "1e201800DNMf",
-- fmadd_4 = "1f000000DNMAf",
-- fmsub_4 = "1f008000DNMAf",
-- fnmadd_4 = "1f200000DNMAf",
-- fnmsub_4 = "1f208000DNMAf",
-- fmax_3 = "1e204800DNMf",
-- fmaxnm_3 = "1e206800DNMf",
-- fmin_3 = "1e205800DNMf",
-- fminnm_3 = "1e207800DNMf",
-- fcmp_2 = "1e202000NMf|1e202008NZf",
-- fcmpe_2 = "1e202010NMf|1e202018NZf",
-- fccmp_4 = "1e200400NMVCf",
-- fccmpe_4 = "1e200410NMVCf",
-- fcsel_4 = "1e200c00DNMCf",
-- -- TODO: crc32*, aes*, sha*, pmull
-- -- TODO: SIMD instructions.
-+ a = "000000005a000000j",
-+ar = "0000000000001a00g",
-+ay = "0000e3000000005ak",
-+ag = "0000e30000000008k",
-+agr = "00000000b9080000h",
-+agf = "0000e30000000018k",
-+agfr = "00000000b9180000h",
-+axbr = "00000000b34a0000h",
-+adbr = "00000000b31a0000h",
-+aebr = "00000000b30a0000h",
-+ah = "000000004a000000j",
-+ahy = "0000e3000000007ak",
-+afi = "0000c20900000000l",
-+agfi = "0000c20800000000l",
-+aih = "0000cc0800000000l",
-+al = "000000005e000000j",
-+alr = "0000000000001e00g",
-+aly = "0000e3000000005ek",
-+alg = "0000e3000000000ak",
-+algr = "00000000b90a0000h",
-+algf = "0000e3000000001ak",
-+algfr = "00000000b91a0000h",
-+alfi = "0000c20b00000000l",
-+algfi = "0000c20a00000000l",
-+alc = "0000e30000000098k",
-+alcr = "00000000b9980000h",
-+alcg = "0000e30000000088k",
-+alcgr = "00000000b9880000h",
-+alsih = "0000cc0a00000000l",
-+alsihn = "0000cc0b00000000l",
-+axr = "0000000000003600g",
-+ad = "000000006a000000j",
-+adr = "0000000000002a00g",
-+ae = "000000007a000000j",
-+aer = "0000000000003a00g",
-+aw = "000000006e000000j",
-+awr = "0000000000002e00g",
-+au = "000000007e000000j",
-+aur = "0000000000003e00g",
-+n = "0000000054000000j",
-+nr = "0000000000001400g",
-+ny = "0000e30000000054k",
-+ng = "0000e30000000080k",
-+ngr = "00000000b9800000h",
-+nihf = "0000c00a00000000l",
-+nilf = "0000c00b00000000l",
-+bal = "0000000045000000j",
-+balr = "000000000000500g",
-+bas = "000000004d000000j",
-+basr = "0000000000000d00g",
-+bassm = "0000000000000c00g",
-+bsa = "00000000b25a0000h",
-+bsm = "0000000000000b00g",
-+bakr = "00000000b2400000h",
-+bsg = "00000000b2580000h",
-+bc = "0000000047000000j",
-+bcr = "000000000000700g",
-+bct = "0000000046000000j",
-+bctr = "000000000000600g",
-+bctg = "0000e30000000046k",
-+bctgr = "00000000b9460000h",
-+bxh = "0000000086000000m",
-+bxhg = "0000eb0000000044n",
-+bxle = "0000000087000000m",
-+bxleg = "0000eb0000000045n",
-+brasl = "0000c00500000000l",
-+brcl = "0000c00400000000l",
-+brcth = "0000cc0600000000l",
-+cksm = "00000000b2410000h",
-+km = "00000000b92e0000h",
-+kmf = "00000000b92a0000h",
-+kmc = "00000000b92f0000h",
-+kmo = "00000000b92b0000h",
-+c = "0000000059000000j",
-+cr = "0000000000001900g",
-+cy = "0000e30000000059k",
-+cg = "0000e30000000020k",
-+cgr = "00000000b9200000h",
-+cgf = "0000e30000000030k",
-+cgfr = "00000000b9300000h",
-+cxbr = "00000000b3490000h",
-+cxtr = "00000000b3ec0000h",
-+cxr = "00000000b3690000h",
-+cdbr = "00000000b3190000h",
-+cdtr = "00000000b3e40000h",
-+cd = "0000000069000000j",
-+cdr = "0000000000002900g",
-+cebr = "00000000b3090000h",
-+ce = "0000000079000000j",
-+cer = "0000000000003900g",
-+kxbr = "00000000b3480000h",
-+kxtr = "00000000b3e80000h",
-+kdbr = "00000000b3180000h",
-+kdtr = "00000000b3e00000h",
-+kebr = "00000000b3080000h",
-+cs = "00000000ba000000m",
-+csy = "0000eb0000000014n",
-+csg = "0000eb0000000030n",
-+csp = "00000000b2500000h",
-+cspg = "00000000b98a0000h",
-+cextr = "00000000b3fc0000h",
-+cedtr = "00000000b3f40000h",
-+cds = "00000000bb000000m",
-+cdsy = "0000eb0000000031n",
-+cdsg = "0000eb000000003en",
-+ch = "0000000049000000j",
-+chy = "0000e30000000079k",
-+cgh = "0000e30000000034k",
-+chrl = "0000c60500000000l",
-+cghrl = "0000c60400000000l",
-+chf = "0000e300000000cdk",
-+chhr = "00000000b9cd0000h",
-+chlr = "00000000b9dd0000h",
-+cfi = "0000c20d00000000l",
-+cgfi = "0000c20c00000000l",
-+cih = "0000cc0d00000000l",
-+cl = "0000000055000000j",
-+clr = "0000000000001500g",
-+cly = "0000e30000000055k",
-+clg = "0000e30000000021k",
-+clgr = "00000000b9210000h",
-+clgf = "0000e30000000031k",
-+clgfr = "00000000b9310000h",
-+clmh = "0000eb0000000020n",
-+clm = "00000000bd000000m",
-+clmy = "0000eb0000000021n",
-+clhf = "0000e300000000cfk",
-+clhhr = "00000000b9cf0000h",
-+clhlr = "00000000b9df0000h",
-+clfi = "0000c20f00000000l",
-+clgfi = "0000c20e00000000l",
-+clih = "0000cc0f00000000l",
-+clcl = "0000000000000f00g",
-+clcle = "00000000a9000000m",
-+clclu = "0000eb000000008fn",
-+clrl = "0000c60f00000000l",
-+clhrl = "0000c60700000000l",
-+clgrl = "0000c60a00000000l",
-+clghrl = "0000c60600000000l",
-+clgfrl = "0000c60e00000000l",
-+clst = "00000000b25d0000h",
-+crl = "0000c60d00000000l",
-+cgrl = "0000c60800000000l",
-+cgfrl = "0000c60c00000000l",
-+ cuse = "00000000b2570000h",
-+cmpsc = "00000000b2630000h",
-+kimd = "00000000b93e0000h",
-+klmd = "00000000b93f0000h",
-+kmac = "00000000b91e0000h",
-+thdr = "00000000b3590000h",
-+thder = "00000000b3580000h",
-+cxfbr = "00000000b3960000h",
-+cxftr = "00000000b9590000h",
-+cxfr = "00000000b3b60000h",
-+cdfbr = "00000000b3950000h",
-+cdftr = "00000000b9510000h",
-+cdfr = "00000000b3b50000h",
-+cefbr = "00000000b3940000h",
-+cefr = "00000000b3b40000h",
-+cxgbr = "00000000b3a60000h",
-+cxgtr = "00000000b3f90000h",
-+cxgr = "00000000b3c60000h",
-+cdgbr = "00000000b3a50000h",
-+cdgtr = "00000000b3f10000h",
-+cdgr = "00000000b3c50000h",
-+cegbr = "00000000b3a40000h",
-+cegr = "00000000b3c40000h",
-+cxstr = "00000000b3fb0000h",
-+cdstr = "00000000b3f30000h",
-+cxutr = "00000000b3fa0000h",
-+cdutr = "00000000b3f20000h",
-+cvb = "000000004f000000j",
-+cvby = "0000e30000000006k",
-+cvbg = "0000e3000000000ek",
-+cvd = "000000004e000000j",
-+cvdy = "0000e30000000026k",
-+cvdg = "0000e3000000002ek",
-+cuxtr = "00000000b3ea0000h",
-+cudtr = "00000000b3e20000h",
-+cu42 = "00000000b9b30000h",
-+cu41 = "00000000b9b20000h",
-+cpya = "00000000b24d0000h",
-+d = "000000005d000000j",
-+dr = "0000000000001d00g",
-+dxbr = "00000000b34d0000h",
-+dxr = "00000000b22d0000h",
-+ddbr = "00000000b31d0000h",
-+dd = "000000006d000000j",
-+ddr = "0000000000002d00g",
-+debr = "00000000b30d0000h",
-+de = "000000007d000000j",
-+der = "0000000000003d00g",
-+dl = "0000e30000000097k",
-+dlr = "00000000b9970000h",
-+dlg = "0000e30000000087k",
-+dlgr = "00000000b9870000h",
-+dsg = "0000e3000000000dk",
-+dsgr = "00000000b90d0000h",
-+dsgf = "0000e3000000001dk",
-+dsgfr = "00000000b91d0000h",
-+x = "0000000057000000j",
-+xr = "0000000000001700g",
-+xy = "0000e30000000057k",
-+xg = "0000e30000000082k",
-+xgr = "00000000b9820000h",
-+xihf = "0000c00600000000l",
-+xilf = "0000c00700000000l",
-+ex = "0000000044000000j",
-+exrl = "0000c60000000000l",
-+ear = "00000000b24f0000h",
-+esea = "00000000b99d0000h",
-+eextr = "00000000b3ed0000h",
-+eedtr = "00000000b3e50000h",
-+ecag = "0000eb000000004cn",
-+efpc = "00000000b38c0000h",
-+epar = "00000000b2260000h",
-+epair = "00000000b99a0000h",
-+epsw = "00000000b98d0000h",
-+esar = "00000000b2270000h",
-+esair = "00000000b99b0000h",
-+esxtr = "00000000b3ef0000h",
-+esdtr = "00000000b3e70000h",
-+ereg = "00000000b2490000h",
-+eregg = "00000000b90e0000h",
-+esta = "00000000b24a0000h",
-+flogr = "00000000b9830000h",
-+hdr = "0000000000002400g",
-+her = "0000000000003400g",
-+iac = "00000000b2240000h",
-+ic = "0000000043000000j",
-+icy = "0000e30000000073k",
-+icmh = "0000eb0000000080n",
-+icm = "00000000bf000000m",
-+icmy = "0000eb0000000081n",
-+iihf = "0000c00800000000l",
-+iilf = "0000c00900000000l",
-+ipm = "00000000b2220000h",
-+iske = "00000000b2290000h",
-+ivsk = "00000000b2230000h",
-+l = "0000000058000000j",
-+lr = "0000000000001800g",
-+ly = "0000e30000000058k",
-+lg = "0000e30000000004k",
-+lgr = "00000000b9040000h",
-+lgf = "0000e30000000014k",
-+lgfr = "00000000b9140000h",
-+lxr = "00000000b3650000h",
-+ld = "0000000068000000j",
-+ldr = "0000000000002800g",
-+ldy = "0000ed0000000065k",
-+le = "0000000078000000j",
-+ler = "0000000000003800g",
-+ ley = "0000ed0000000064k",
-+lam = "000000009a000000m",
-+lamy = "0000eb000000009an",
-+la = "0000000041000000j",
-+lay = "0000e30000000071k",
-+lae = "0000000051000000j",
-+laey = "0000e30000000075k",
-+larl = "0000c00000000000l",
-+laa = "0000eb00000000f8n",
-+laag = "0000eb00000000e8n",
-+laal = "0000eb00000000fan",
-+laalg = "0000eb00000000ean",
-+lan = "0000eb00000000f4n",
-+lang = "0000eb00000000e4n",
-+lax = "0000eb00000000f7n",
-+laxg = "0000eb00000000e7n",
-+lao = "0000eb00000000f6n",
-+laog = "0000eb00000000e6n",
-+lt = "0000e30000000012k",
-+ltr = "0000000000001200g",
-+ltg = "0000e30000000002k",
-+ltgr = "00000000b9020000h",
-+ltgf = "0000e30000000032k",
-+ltgfr = "00000000b9120000h",
-+ltxbr = "00000000b3420000h",
-+ltxtr = "00000000b3de0000h",
-+ltxr = "00000000b3620000h",
-+ltdbr = "00000000b3120000h",
-+ltdtr = "00000000b3d60000h",
-+ltdr = "0000000000002200g",
-+ltebr = "00000000b3020000h",
-+lter = "0000000000003200g",
-+lb = "0000e30000000076k",
-+lbr = "00000000b9260000h",
-+lgb = "0000e30000000077k",
-+lgbr = "00000000b9060000h",
-+ lbh = "0000e300000000c0k",
-+lcr = "0000000000001300g",
-+lcgr = "00000000b9030000h",
-+lcgfr = "00000000b9130000h",
-+lcxbr = "00000000b3430000h",
-+lcxr = "00000000b3630000h",
-+lcdbr = "00000000b3130000h",
-+lcdr = "0000000000002300g",
-+lcdfr = "00000000b3730000h",
-+lcebr = "00000000b3030000h",
-+lcer = "0000000000003300g",
-+lctl = "00000000b7000000m",
-+lctlg = "0000eb000000002fn",
-+fixr = "00000000b3670000h",
-+fidr = "00000000b37f0000h",
-+fier = "00000000b3770000h",
-+ldgr = "00000000b3c10000h",
-+lgdr = "00000000b3cd0000h",
-+lh = "0000000048000000j",
-+lhr = "00000000b9270000h",
-+lhy = "0000e30000000078k",
-+lgh = "0000e30000000015k",
-+lghr = "00000000b9070000h",
-+lhh = "0000e300000000c4k",
-+lhrl = "0000c40500000000l",
-+lghrl = "0000c40400000000l",
-+lfh = "0000e300000000cak",
-+lgfi = "0000c00100000000l",
-+lxdbr = "00000000b3050000h",
-+lxdr = "00000000b3250000h",
-+lxebr = "00000000b3060000h",
-+lxer = "00000000b3260000h",
-+ldebr = "00000000b3040000h",
-+lder = "00000000b3240000h",
-+llgf = "0000e30000000016k",
-+llgfr = "00000000b9160000h",
-+llc = "0000e30000000094k",
-+llcr = "00000000b9940000h",
-+llgc = "0000e30000000090k",
-+llgcr = "00000000b9840000h",
-+llch = "0000e300000000c2k",
-+llh = "0000e30000000095k",
-+llhr = "00000000b9950000h",
-+llgh = "0000e30000000091k",
-+llghr = "00000000b9850000h",
-+llhh = "0000e300000000c6k",
-+llhrl = "0000c40200000000l",
-+llghrl = "0000c40600000000l",
-+llihf = "0000c00e00000000l",
-+llilf = "0000c00f00000000l",
-+llgfrl = "0000c40e00000000l",
-+llgt = "0000e30000000017k",
-+llgtr = "00000000b9170000h",
-+lm = "0000000098000000m",
-+lmy = "0000eb0000000098n",
-+lmg = "0000eb0000000004n",
-+lmh = "0000eb0000000096n",
-+lnr = "0000000000001100g",
-+lngr = "00000000b9010000h",
-+lngfr = "00000000b9110000h",
-+lnxbr = "00000000b3410000h",
-+lnxr = "00000000b3610000h",
-+lndbr = "00000000b3110000h",
-+lndr = "0000000000002100g",
-+lndfr = "00000000b3710000h",
-+lnebr = "00000000b3010000h",
-+lner = "0000000000003100g",
-+loc = "0000eb00000000f2n",
-+locg = "0000eb00000000e2n",
-+lpq = "0000e3000000008fk",
-+lpr = "0000000000001000g",
-+lpgr = "00000000b9000000h",
-+lpgfr = "00000000b9100000h",
-+lpxbr = "00000000b3400000h",
-+lpxr = "00000000b3600000h",
-+lpdbr = "00000000b3100000h",
-+lpdr = "0000000000002000g",
-+lpdfr = "00000000b3700000h",
-+lpebr = "00000000b3000000h",
-+lper = "0000000000003000g",
-+lra = "00000000b1000000j",
-+lray = "0000e30000000013k",
-+lrag = "0000e30000000003k",
-+lrl = "0000c40d00000000l",
-+lgrl = "0000c40800000000l",
-+lgfrl = "0000c40c00000000l",
-+lrvh = "0000e3000000001fk",
-+lrv = "0000e3000000001ek",
-+lrvr = "00000000b91f0000h",
-+lrvg = "0000e3000000000fk",
-+lrvgr = "00000000b90f0000h",
-+ldxbr = "00000000b3450000h",
-+ldxr = "0000000000002500g",
-+lrdr = "0000000000002500g",
-+lexbr = "00000000b3460000h",
-+lexr = "00000000b3660000h",
-+ledbr = "00000000b3440000h",
-+ledr = "0000000000003500g",
-+lrer = "0000000000003500g",
-+lura = "00000000b24b0000h",
-+lurag = "00000000b9050000h",
-+lzxr = "00000000b3760000h",
-+lzdr = "00000000b3750000h",
-+lzer = "00000000b3740000h",
-+msta = "00000000b2470000h",
-+mvcl = "0000000000000e00g",
-+mvcle = "00000000a8000000m",
-+mvclu = "0000eb000000008en",
-+mvpg = "00000000b2540000h",
-+mvst = "00000000b2550000h",
-+m = "000000005c000000j",
-+mfy = "0000e3000000005ck",
-+mr = "0000000000001c00g",
-+mxbr = "00000000b34c0000h",
-+mxr = "0000000000002600g",
-+mdbr = "00000000b31c0000h",
-+md = "000000006c000000j",
-+mdr = "0000000000002c00g",
-+mxdbr = "00000000b3070000h",
-+mxd = "0000000067000000j",
-+mxdr = "0000000000002700g",
-+meebr = "00000000b3170000h",
-+meer = "00000000b3370000h",
-+mdebr = "00000000b30c0000h",
-+mde = "000000007c000000j",
-+mder = "0000000000003c00g",
-+me = "000000007c000000j",
-+mer = "0000000000003c00g",
-+mh = "000000004c000000j",
-+mhy = "0000e3000000007ck",
-+mlg = "0000e30000000086k",
-+mlgr = "00000000b9860000h",
-+ml = "0000e30000000096k",
-+mlr = "00000000b9960000h",
-+ms = "0000000071000000j",
-+msr = "00000000b2520000h",
-+msy = "0000e30000000051k",
-+msg = "0000e3000000000ck",
-+msgr = "00000000b90c0000h",
-+msgf = "0000e3000000001ck",
-+msgfr = "00000000b91c0000h",
-+msfi = "0000c20100000000l",
-+msgfi = "0000c20000000000l",
-+o = "0000000056000000j",
-+or = "0000000000001600g",
-+oy = "0000e30000000056k",
-+og = "0000e30000000081k",
-+ogr = "00000000b9810000h",
-+oihf = "0000c00c00000000l",
-+oilf = "0000c00d00000000l",
-+pgin = "00000000b22e0000h",
-+pgout = "00000000b22f0000h",
-+pcc = "00000000b92c0000h",
-+pckmo = "00000000b9280000h",
-+pfmf = "00000000b9af0000h",
-+ptf = "00000000b9a20000h",
-+popcnt = "00000000b9e10000h",
-+pfd = "0000e30000000036k",
-+pfdrl = "0000c60200000000l",
-+pt = "00000000b2280000h",
-+pti = "00000000b99e0000h",
-+palb = "00000000b2480000h",
-+rrbe = "00000000b22a0000h",
-+rrbm = "00000000b9ae0000h",
-+rll = "0000eb000000001dn",
-+rllg = "0000eb000000001cn",
-+srst = "00000000b25e0000h",
-+srstu = "00000000b9be0000h",
-+sar = "00000000b24e0000h",
-+sfpc = "00000000b3840000h",
-+sfasr = "00000000b3850000h",
-+spm = "000000000000400g",
-+ssar = "00000000b2250000h",
-+ssair = "00000000b99f0000h",
-+slda = "000000008f000000m",
-+sldl = "000000008d000000m",
-+sla = "000000008b000000m",
-+slak = "0000eb00000000ddn",
-+slag = "0000eb000000000bn",
-+sll = "0000000089000000m",
-+sllk = "0000eb00000000dfn",
-+sllg = "0000eb000000000dn",
-+srda = "000000008e000000m",
-+srdl = "000000008c000000m",
-+sra = "000000008a000000m",
-+srak = "0000eb00000000dcn",
-+srag = "0000eb000000000an",
-+srl = "0000000088000000m",
-+srlk = "0000eb00000000den",
-+srlg = "0000eb000000000cn",
-+sqxbr = "00000000b3160000h",
-+sqxr = "00000000b3360000h",
-+sqdbr = "00000000b3150000h",
-+sqdr = "00000000b2440000h",
-+sqebr = "00000000b3140000h",
-+sqer = "00000000b2450000h",
-+st = "0000000050000000j",
-+sty = "0000e30000000050k",
-+stg = "0000e30000000024k",
-+std = "0000000060000000j",
-+stdy = "0000ed0000000067k",
-+ste = "0000000070000000j",
-+stey = "0000ed0000000066k",
-+stam = "000000009b000000m",
-+stamy = "0000eb000000009bn",
-+stc = "0000000042000000j",
-+stcy = "0000e30000000072k",
-+stch = "0000e300000000c3k",
-+stcmh = "0000eb000000002cn",
-+stcm = "00000000be000000m",
-+stcmy = "0000eb000000002dn",
-+stctl = "00000000b6000000m",
-+stctg = "0000eb0000000025n",
-+sth = "0000000040000000j",
-+sthy = "0000e30000000070k",
-+sthh = "0000e300000000c7k",
-+sthrl = "0000c40700000000l",
-+stfh = "0000e300000000cbk",
-+stm = "0000000090000000m",
-+stmy = "0000eb0000000090n",
-+stmg = "0000eb0000000024n",
-+stmh = "0000eb0000000026n",
-+stoc = "0000eb00000000f3n",
-+stocg = "0000eb00000000e3n",
-+stpq = "0000e3000000008ek",
-+strl = "0000c40f00000000l",
-+stgrl = "0000c40b00000000l",
-+strvh = "0000e3000000003fk",
-+strv = "0000e3000000003ek",
-+strvg = "0000e3000000002fk",
-+stura = "00000000b2460000h",
-+sturg = "00000000b9250000h",
-+s = "000000005b000000j",
-+sr = "0000000000001b00g",
-+sy = "0000e3000000005bk",
-+sg = "0000e30000000009k",
-+sgr = "00000000b9090000h",
-+sgf = "0000e30000000019k",
-+sgfr = "00000000b9190000h",
-+sxbr = "00000000b34b0000h",
-+sdbr = "00000000b31b0000h",
-+sebr = "00000000b30b0000h",
-+sh = "000000004b000000j",
-+shy = "0000e3000000007bk",
-+sl = "000000005f000000j",
-+slr = "0000000000001f00g",
-+sly = "0000e3000000005fk",
-+slg = "0000e3000000000bk",
-+slgr = "00000000b90b0000h",
-+slgf = "0000e3000000001bk",
-+slgfr = "00000000b91b0000h",
-+slfi = "0000c20500000000l",
-+slgfi = "0000c20400000000l",
-+slb = "0000e30000000099k",
-+slbr = "00000000b9990000h",
-+slbg = "0000e30000000089k",
-+slbgr = "00000000b9890000h",
-+sxr = "0000000000003700g",
-+sd = "000000006b000000j",
-+sdr = "0000000000002b00g",
-+se = "000000007b000000j",
-+ser = "0000000000003b00g",
-+su = "000000007f000000j",
-+sur = "0000000000003f00g",
-+sw = "000000006f000000j",
-+swr = "0000000000002f00g",
-+tar = "00000000b24c0000h",
-+tb = "00000000b22c0000h",
-+trace = "0000000099000000m",
-+tracg = "0000eb000000000fn",
-+tre = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
- map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-@@ -964,87 +1218,30 @@ end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
- local function parse_template(params, template, nparams, pos)
-- local op = tonumber(sub(template, 1, 12), 16) -- 13-16 ignored since those are trailing zeros added after the instruction
-+ local op = tonumber(sub(template, 1, 16), 16) --
- -- 00000000005a0000 converts to 90
- local n,rs = 1,26
-
- parse_reg_type = false
- -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
- for p in gmatch(sub(template, 17), ".") do
-- local q = params[n]
-- if p == "R" then
-- op = op + parse_reg(q); n = n + 1
-- elseif p == "N" then
-- op = op + shl(parse_reg(q), 5); n = n + 1
-- elseif p == "M" then
-- op = op + shl(parse_reg(q), 16); n = n + 1
-- elseif p == "A" then
-- op = op + shl(parse_reg(q), 10); n = n + 1
-+ local pr1,pr2,pr3
-+ if p == "g" then
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
-+ elseif p == "h" then
-+
-+ elseif p == "j" then
-+
-+ elseif p == "k" then
-+
-+ elseif p == "l" then
-+
- elseif p == "m" then
-- op = op + shl(parse_reg(params[n-1]), 16)
-- elseif p == "p" then
-- if q == "sp" then params[n] = "@x31" end
-- elseif p == "g" then
-- if parse_reg_type == "x" then
-- op = op + 0x80000000
-- elseif parse_reg_type ~= "w" then
-- werror("bad register type")
-- end
-- parse_reg_type = false
-- elseif p == "f" then
-- if parse_reg_type == "d" then
-- op = op + 0x00400000
-- elseif parse_reg_type ~= "s" then
-- werror("bad register type")
-- end
-- parse_reg_type = false
-- elseif p == "x" or p == "w" or p == "d" or p == "s" then
-- if parse_reg_type ~= p then
-- werror("register size mismatch")
-+
-+ elseif p == "n" then
-+
- end
-- parse_reg_type = false
-- elseif p == "L" then
-- op = parse_load(params, nparams, n, op)
-- elseif p == "P" then
-- op = parse_load_pair(params, nparams, n, op)
-- elseif p == "B" then
-- local mode, v, s = parse_label(q, false); n = n + 1
-- local m = branch_type(op)
-- waction("REL_"..mode, v+m, s, 1)
-- elseif p == "I" then
-- op = op + parse_imm12(q); n = n + 1
-- elseif p == "i" then
-- op = op + parse_imm13(q); n = n + 1
-- elseif p == "W" then
-- op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
-- elseif p == "T" then
-- op = op + parse_imm6(q); n = n + 1
-- elseif p == "1" then
-- op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
-- elseif p == "2" then
-- op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
-- elseif p == "5" then
-- op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
-- elseif p == "V" then
-- op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
-- elseif p == "F" then
-- op = op + parse_fpimm(q); n = n + 1
-- elseif p == "Z" then
-- if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
-- n = n + 1
-- elseif p == "S" then
-- op = op + parse_shift(q); n = n + 1
-- elseif p == "X" then
-- op = op + parse_extend(q); n = n + 1
-- elseif p == "R" then
-- op = op + parse_lslx16(q); n = n + 1
-- elseif p == "C" then
-- op = op + parse_cond(q, 0); n = n + 1
-- elseif p == "c" then
-- op = op + parse_cond(q, 1); n = n + 1
-- else
-- assert(false)
-- end
- end
- wputpos(pos, op)
- end
-
-From cac5d4f2fbebccf57dc34e443fbd7d55db77b242 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Fri, 25 Nov 2016 16:38:32 -0500
-Subject: [PATCH 034/260] Add extended mnemonics for branches.
-
----
- dynasm/dasm_s390x.lua | 22 ++++++++++++++++------
- 1 file changed, 16 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index e39a27f1b..76fe281e0 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -239,9 +239,10 @@ local map_extend = {
- }
-
- local map_cond = {
-- eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
-- hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
-- hs = 2, lo = 3,
-+ o = 1, h = 2, hle = 3, l = 4,
-+ nhe = 5, lh = 6, ne = 7, e = 8,
-+ nlh = 9, he = 10, nl = 11, le = 12,
-+ nh = 13, no = 14, [""] = 15,
- }
-
- ------------------------------------------------------------------------------
-@@ -650,7 +651,7 @@ local alias_lslimm = op_alias("ubfm_4", function(p)
- end
- end)
-
---- Template strings for ARM instructions.
-+-- Template strings for s390x instructions.
- map_op = {
- a = "000000005a000000j",
- ar = "0000000000001a00g",
-@@ -1084,7 +1085,7 @@ msgfr = "00000000b91c0000h",
- msfi = "0000c20100000000l",
- msgfi = "0000c20000000000l",
- o = "0000000056000000j",
--or = "0000000000001600g",
-+["or"] = "0000000000001600g",
- oy = "0000e30000000056k",
- og = "0000e30000000081k",
- ogr = "00000000b9810000h",
-@@ -1213,7 +1214,16 @@ tracg = "0000eb000000000fn",
- tre = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
-- map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
-+ -- Extended mnemonics for branches.
-+ -- TODO: replace 'B' with correct encoding.
-+ -- brc
-+ map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."B"
-+ -- brcl
-+ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."B"
-+ -- bc
-+ map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."B"
-+ -- bcr
-+ map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."B"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
-
-From 203006579f28d0aa0a0108845fd589f44a00e164 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Mon, 28 Nov 2016 13:32:30 +0530
-Subject: [PATCH 035/260] Removed the extra check in parse_reg
-
-The extra check for register is currently ignored, and trying to see what value does the encode function return. Its still to be worked out, how this value is used later, after decoding.
----
- dynasm/dasm_s390x.lua | 10 +---------
- 1 file changed, 1 insertion(+), 9 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 76fe281e0..340ad24c9 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -251,15 +251,7 @@ local parse_reg_type
-
-
- local function parse_gpr(expr)
-- local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$")
-- local tp = map_type[tname or expr]
-- if tp then
-- local reg = ovreg or tp.reg
-- if not reg then
-- werror("type `"..(tname or expr).."' needs a register override")
-- end
-- expr = reg
-- end
-+ -- assuming we get r0-r31 for now
- local r = match(expr, "^r([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
-
-From 538a4afee2995fee75ec97faddbb5c7cb16b4432 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Mon, 28 Nov 2016 15:29:58 +0530
-Subject: [PATCH 036/260] Updated size of the instruction word
-
-We can discuss if we need to keep it 6 bytes or 8 bytes long, Not clear enough to me as well
----
- dynasm/dasm_s390x.lua | 10 +++++-----
- 1 file changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 340ad24c9..2965034a4 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -88,7 +88,7 @@ end
-
- -- Add word to action list.
- local function wputxw(n)
-- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
-+ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range") -- s390x inst can be 6 bytes
- actlist[#actlist+1] = n
- end
-
-@@ -109,7 +109,7 @@ local function wflush(term)
- secpos = 1 -- The actionlist offset occupies a buffer position, too.
- end
-
---- Put escaped word.
-+-- Put escaped word. --Need to check this as well, not sure how it will work on s390x
- local function wputw(n)
- if n <= 0x000fffff then waction("ESC") end
- wputxw(n)
-@@ -122,9 +122,9 @@ local function wpos()
- return pos
- end
-
---- Store word to reserved position.
-+-- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst as well
- local function wputpos(pos, n)
-- assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
-+ assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range")
- if n <= 0x000fffff then
- insert(actlist, pos+1, n)
- n = map_action.ESC * 0x10000
-@@ -278,7 +278,7 @@ local function parse_reg_base(expr)
- local base, tp = parse_reg(expr)
- if parse_reg_type ~= "x" then werror("bad register type") end
- parse_reg_type = false
-- return shl(base, 5), tp
-+ return shl(base, 5), tp -- why is it shifted not able to make out
- end
-
- local parse_ctx = {}
-
-From 7a49be07be90657e5fe2f3e960d85736d927d2f8 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Tue, 29 Nov 2016 19:00:28 +0530
-Subject: [PATCH 037/260] Added the required character for encoding
-
-I have added the number depending on the number of operands, pls check for the ones which access memory.
-Also For base register and displacement, should I assume that it will be passed in the same order as it is expected, since I dont have any means to see the output, I am confused a bit for those add modes.
-Since we decided to test RR first, thats in progress, but would like to add others as well.
----
- dynasm/dasm_s390x.lua | 1130 +++++++++++++++++++++--------------------
- 1 file changed, 567 insertions(+), 563 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2965034a4..f1d492c12 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -645,565 +645,565 @@ end)
-
- -- Template strings for s390x instructions.
- map_op = {
-- a = "000000005a000000j",
--ar = "0000000000001a00g",
--ay = "0000e3000000005ak",
--ag = "0000e30000000008k",
--agr = "00000000b9080000h",
--agf = "0000e30000000018k",
--agfr = "00000000b9180000h",
--axbr = "00000000b34a0000h",
--adbr = "00000000b31a0000h",
--aebr = "00000000b30a0000h",
--ah = "000000004a000000j",
--ahy = "0000e3000000007ak",
--afi = "0000c20900000000l",
--agfi = "0000c20800000000l",
--aih = "0000cc0800000000l",
--al = "000000005e000000j",
--alr = "0000000000001e00g",
--aly = "0000e3000000005ek",
--alg = "0000e3000000000ak",
--algr = "00000000b90a0000h",
--algf = "0000e3000000001ak",
--algfr = "00000000b91a0000h",
--alfi = "0000c20b00000000l",
--algfi = "0000c20a00000000l",
--alc = "0000e30000000098k",
--alcr = "00000000b9980000h",
--alcg = "0000e30000000088k",
--alcgr = "00000000b9880000h",
--alsih = "0000cc0a00000000l",
--alsihn = "0000cc0b00000000l",
--axr = "0000000000003600g",
--ad = "000000006a000000j",
--adr = "0000000000002a00g",
--ae = "000000007a000000j",
--aer = "0000000000003a00g",
--aw = "000000006e000000j",
--awr = "0000000000002e00g",
--au = "000000007e000000j",
--aur = "0000000000003e00g",
--n = "0000000054000000j",
--nr = "0000000000001400g",
--ny = "0000e30000000054k",
--ng = "0000e30000000080k",
--ngr = "00000000b9800000h",
--nihf = "0000c00a00000000l",
--nilf = "0000c00b00000000l",
--bal = "0000000045000000j",
--balr = "000000000000500g",
--bas = "000000004d000000j",
--basr = "0000000000000d00g",
--bassm = "0000000000000c00g",
--bsa = "00000000b25a0000h",
--bsm = "0000000000000b00g",
--bakr = "00000000b2400000h",
--bsg = "00000000b2580000h",
--bc = "0000000047000000j",
--bcr = "000000000000700g",
--bct = "0000000046000000j",
--bctr = "000000000000600g",
--bctg = "0000e30000000046k",
--bctgr = "00000000b9460000h",
--bxh = "0000000086000000m",
--bxhg = "0000eb0000000044n",
--bxle = "0000000087000000m",
--bxleg = "0000eb0000000045n",
--brasl = "0000c00500000000l",
--brcl = "0000c00400000000l",
--brcth = "0000cc0600000000l",
--cksm = "00000000b2410000h",
--km = "00000000b92e0000h",
--kmf = "00000000b92a0000h",
--kmc = "00000000b92f0000h",
--kmo = "00000000b92b0000h",
--c = "0000000059000000j",
--cr = "0000000000001900g",
--cy = "0000e30000000059k",
--cg = "0000e30000000020k",
--cgr = "00000000b9200000h",
--cgf = "0000e30000000030k",
--cgfr = "00000000b9300000h",
--cxbr = "00000000b3490000h",
--cxtr = "00000000b3ec0000h",
--cxr = "00000000b3690000h",
--cdbr = "00000000b3190000h",
--cdtr = "00000000b3e40000h",
--cd = "0000000069000000j",
--cdr = "0000000000002900g",
--cebr = "00000000b3090000h",
--ce = "0000000079000000j",
--cer = "0000000000003900g",
--kxbr = "00000000b3480000h",
--kxtr = "00000000b3e80000h",
--kdbr = "00000000b3180000h",
--kdtr = "00000000b3e00000h",
--kebr = "00000000b3080000h",
--cs = "00000000ba000000m",
--csy = "0000eb0000000014n",
--csg = "0000eb0000000030n",
--csp = "00000000b2500000h",
--cspg = "00000000b98a0000h",
--cextr = "00000000b3fc0000h",
--cedtr = "00000000b3f40000h",
--cds = "00000000bb000000m",
--cdsy = "0000eb0000000031n",
--cdsg = "0000eb000000003en",
--ch = "0000000049000000j",
--chy = "0000e30000000079k",
--cgh = "0000e30000000034k",
--chrl = "0000c60500000000l",
--cghrl = "0000c60400000000l",
--chf = "0000e300000000cdk",
--chhr = "00000000b9cd0000h",
--chlr = "00000000b9dd0000h",
--cfi = "0000c20d00000000l",
--cgfi = "0000c20c00000000l",
--cih = "0000cc0d00000000l",
--cl = "0000000055000000j",
--clr = "0000000000001500g",
--cly = "0000e30000000055k",
--clg = "0000e30000000021k",
--clgr = "00000000b9210000h",
--clgf = "0000e30000000031k",
--clgfr = "00000000b9310000h",
--clmh = "0000eb0000000020n",
--clm = "00000000bd000000m",
--clmy = "0000eb0000000021n",
--clhf = "0000e300000000cfk",
--clhhr = "00000000b9cf0000h",
--clhlr = "00000000b9df0000h",
--clfi = "0000c20f00000000l",
--clgfi = "0000c20e00000000l",
--clih = "0000cc0f00000000l",
--clcl = "0000000000000f00g",
--clcle = "00000000a9000000m",
--clclu = "0000eb000000008fn",
--clrl = "0000c60f00000000l",
--clhrl = "0000c60700000000l",
--clgrl = "0000c60a00000000l",
--clghrl = "0000c60600000000l",
--clgfrl = "0000c60e00000000l",
--clst = "00000000b25d0000h",
--crl = "0000c60d00000000l",
--cgrl = "0000c60800000000l",
--cgfrl = "0000c60c00000000l",
-- cuse = "00000000b2570000h",
--cmpsc = "00000000b2630000h",
--kimd = "00000000b93e0000h",
--klmd = "00000000b93f0000h",
--kmac = "00000000b91e0000h",
--thdr = "00000000b3590000h",
--thder = "00000000b3580000h",
--cxfbr = "00000000b3960000h",
--cxftr = "00000000b9590000h",
--cxfr = "00000000b3b60000h",
--cdfbr = "00000000b3950000h",
--cdftr = "00000000b9510000h",
--cdfr = "00000000b3b50000h",
--cefbr = "00000000b3940000h",
--cefr = "00000000b3b40000h",
--cxgbr = "00000000b3a60000h",
--cxgtr = "00000000b3f90000h",
--cxgr = "00000000b3c60000h",
--cdgbr = "00000000b3a50000h",
--cdgtr = "00000000b3f10000h",
--cdgr = "00000000b3c50000h",
--cegbr = "00000000b3a40000h",
--cegr = "00000000b3c40000h",
--cxstr = "00000000b3fb0000h",
--cdstr = "00000000b3f30000h",
--cxutr = "00000000b3fa0000h",
--cdutr = "00000000b3f20000h",
--cvb = "000000004f000000j",
--cvby = "0000e30000000006k",
--cvbg = "0000e3000000000ek",
--cvd = "000000004e000000j",
--cvdy = "0000e30000000026k",
--cvdg = "0000e3000000002ek",
--cuxtr = "00000000b3ea0000h",
--cudtr = "00000000b3e20000h",
--cu42 = "00000000b9b30000h",
--cu41 = "00000000b9b20000h",
--cpya = "00000000b24d0000h",
--d = "000000005d000000j",
--dr = "0000000000001d00g",
--dxbr = "00000000b34d0000h",
--dxr = "00000000b22d0000h",
--ddbr = "00000000b31d0000h",
--dd = "000000006d000000j",
--ddr = "0000000000002d00g",
--debr = "00000000b30d0000h",
--de = "000000007d000000j",
--der = "0000000000003d00g",
--dl = "0000e30000000097k",
--dlr = "00000000b9970000h",
--dlg = "0000e30000000087k",
--dlgr = "00000000b9870000h",
--dsg = "0000e3000000000dk",
--dsgr = "00000000b90d0000h",
--dsgf = "0000e3000000001dk",
--dsgfr = "00000000b91d0000h",
--x = "0000000057000000j",
--xr = "0000000000001700g",
--xy = "0000e30000000057k",
--xg = "0000e30000000082k",
--xgr = "00000000b9820000h",
--xihf = "0000c00600000000l",
--xilf = "0000c00700000000l",
--ex = "0000000044000000j",
--exrl = "0000c60000000000l",
--ear = "00000000b24f0000h",
--esea = "00000000b99d0000h",
--eextr = "00000000b3ed0000h",
--eedtr = "00000000b3e50000h",
--ecag = "0000eb000000004cn",
--efpc = "00000000b38c0000h",
--epar = "00000000b2260000h",
--epair = "00000000b99a0000h",
--epsw = "00000000b98d0000h",
--esar = "00000000b2270000h",
--esair = "00000000b99b0000h",
--esxtr = "00000000b3ef0000h",
--esdtr = "00000000b3e70000h",
--ereg = "00000000b2490000h",
--eregg = "00000000b90e0000h",
--esta = "00000000b24a0000h",
--flogr = "00000000b9830000h",
--hdr = "0000000000002400g",
--her = "0000000000003400g",
--iac = "00000000b2240000h",
--ic = "0000000043000000j",
--icy = "0000e30000000073k",
--icmh = "0000eb0000000080n",
--icm = "00000000bf000000m",
--icmy = "0000eb0000000081n",
--iihf = "0000c00800000000l",
--iilf = "0000c00900000000l",
--ipm = "00000000b2220000h",
--iske = "00000000b2290000h",
--ivsk = "00000000b2230000h",
--l = "0000000058000000j",
--lr = "0000000000001800g",
--ly = "0000e30000000058k",
--lg = "0000e30000000004k",
--lgr = "00000000b9040000h",
--lgf = "0000e30000000014k",
--lgfr = "00000000b9140000h",
--lxr = "00000000b3650000h",
--ld = "0000000068000000j",
--ldr = "0000000000002800g",
--ldy = "0000ed0000000065k",
--le = "0000000078000000j",
--ler = "0000000000003800g",
-- ley = "0000ed0000000064k",
--lam = "000000009a000000m",
--lamy = "0000eb000000009an",
--la = "0000000041000000j",
--lay = "0000e30000000071k",
--lae = "0000000051000000j",
--laey = "0000e30000000075k",
--larl = "0000c00000000000l",
--laa = "0000eb00000000f8n",
--laag = "0000eb00000000e8n",
--laal = "0000eb00000000fan",
--laalg = "0000eb00000000ean",
--lan = "0000eb00000000f4n",
--lang = "0000eb00000000e4n",
--lax = "0000eb00000000f7n",
--laxg = "0000eb00000000e7n",
--lao = "0000eb00000000f6n",
--laog = "0000eb00000000e6n",
--lt = "0000e30000000012k",
--ltr = "0000000000001200g",
--ltg = "0000e30000000002k",
--ltgr = "00000000b9020000h",
--ltgf = "0000e30000000032k",
--ltgfr = "00000000b9120000h",
--ltxbr = "00000000b3420000h",
--ltxtr = "00000000b3de0000h",
--ltxr = "00000000b3620000h",
--ltdbr = "00000000b3120000h",
--ltdtr = "00000000b3d60000h",
--ltdr = "0000000000002200g",
--ltebr = "00000000b3020000h",
--lter = "0000000000003200g",
--lb = "0000e30000000076k",
--lbr = "00000000b9260000h",
--lgb = "0000e30000000077k",
--lgbr = "00000000b9060000h",
-- lbh = "0000e300000000c0k",
--lcr = "0000000000001300g",
--lcgr = "00000000b9030000h",
--lcgfr = "00000000b9130000h",
--lcxbr = "00000000b3430000h",
--lcxr = "00000000b3630000h",
--lcdbr = "00000000b3130000h",
--lcdr = "0000000000002300g",
--lcdfr = "00000000b3730000h",
--lcebr = "00000000b3030000h",
--lcer = "0000000000003300g",
--lctl = "00000000b7000000m",
--lctlg = "0000eb000000002fn",
--fixr = "00000000b3670000h",
--fidr = "00000000b37f0000h",
--fier = "00000000b3770000h",
--ldgr = "00000000b3c10000h",
--lgdr = "00000000b3cd0000h",
--lh = "0000000048000000j",
--lhr = "00000000b9270000h",
--lhy = "0000e30000000078k",
--lgh = "0000e30000000015k",
--lghr = "00000000b9070000h",
--lhh = "0000e300000000c4k",
--lhrl = "0000c40500000000l",
--lghrl = "0000c40400000000l",
--lfh = "0000e300000000cak",
--lgfi = "0000c00100000000l",
--lxdbr = "00000000b3050000h",
--lxdr = "00000000b3250000h",
--lxebr = "00000000b3060000h",
--lxer = "00000000b3260000h",
--ldebr = "00000000b3040000h",
--lder = "00000000b3240000h",
--llgf = "0000e30000000016k",
--llgfr = "00000000b9160000h",
--llc = "0000e30000000094k",
--llcr = "00000000b9940000h",
--llgc = "0000e30000000090k",
--llgcr = "00000000b9840000h",
--llch = "0000e300000000c2k",
--llh = "0000e30000000095k",
--llhr = "00000000b9950000h",
--llgh = "0000e30000000091k",
--llghr = "00000000b9850000h",
--llhh = "0000e300000000c6k",
--llhrl = "0000c40200000000l",
--llghrl = "0000c40600000000l",
--llihf = "0000c00e00000000l",
--llilf = "0000c00f00000000l",
--llgfrl = "0000c40e00000000l",
--llgt = "0000e30000000017k",
--llgtr = "00000000b9170000h",
--lm = "0000000098000000m",
--lmy = "0000eb0000000098n",
--lmg = "0000eb0000000004n",
--lmh = "0000eb0000000096n",
--lnr = "0000000000001100g",
--lngr = "00000000b9010000h",
--lngfr = "00000000b9110000h",
--lnxbr = "00000000b3410000h",
--lnxr = "00000000b3610000h",
--lndbr = "00000000b3110000h",
--lndr = "0000000000002100g",
--lndfr = "00000000b3710000h",
--lnebr = "00000000b3010000h",
--lner = "0000000000003100g",
--loc = "0000eb00000000f2n",
--locg = "0000eb00000000e2n",
--lpq = "0000e3000000008fk",
--lpr = "0000000000001000g",
--lpgr = "00000000b9000000h",
--lpgfr = "00000000b9100000h",
--lpxbr = "00000000b3400000h",
--lpxr = "00000000b3600000h",
--lpdbr = "00000000b3100000h",
--lpdr = "0000000000002000g",
--lpdfr = "00000000b3700000h",
--lpebr = "00000000b3000000h",
--lper = "0000000000003000g",
--lra = "00000000b1000000j",
--lray = "0000e30000000013k",
--lrag = "0000e30000000003k",
--lrl = "0000c40d00000000l",
--lgrl = "0000c40800000000l",
--lgfrl = "0000c40c00000000l",
--lrvh = "0000e3000000001fk",
--lrv = "0000e3000000001ek",
--lrvr = "00000000b91f0000h",
--lrvg = "0000e3000000000fk",
--lrvgr = "00000000b90f0000h",
--ldxbr = "00000000b3450000h",
--ldxr = "0000000000002500g",
--lrdr = "0000000000002500g",
--lexbr = "00000000b3460000h",
--lexr = "00000000b3660000h",
--ledbr = "00000000b3440000h",
--ledr = "0000000000003500g",
--lrer = "0000000000003500g",
--lura = "00000000b24b0000h",
--lurag = "00000000b9050000h",
--lzxr = "00000000b3760000h",
--lzdr = "00000000b3750000h",
--lzer = "00000000b3740000h",
--msta = "00000000b2470000h",
--mvcl = "0000000000000e00g",
--mvcle = "00000000a8000000m",
--mvclu = "0000eb000000008en",
--mvpg = "00000000b2540000h",
--mvst = "00000000b2550000h",
--m = "000000005c000000j",
--mfy = "0000e3000000005ck",
--mr = "0000000000001c00g",
--mxbr = "00000000b34c0000h",
--mxr = "0000000000002600g",
--mdbr = "00000000b31c0000h",
--md = "000000006c000000j",
--mdr = "0000000000002c00g",
--mxdbr = "00000000b3070000h",
--mxd = "0000000067000000j",
--mxdr = "0000000000002700g",
--meebr = "00000000b3170000h",
--meer = "00000000b3370000h",
--mdebr = "00000000b30c0000h",
--mde = "000000007c000000j",
--mder = "0000000000003c00g",
--me = "000000007c000000j",
--mer = "0000000000003c00g",
--mh = "000000004c000000j",
--mhy = "0000e3000000007ck",
--mlg = "0000e30000000086k",
--mlgr = "00000000b9860000h",
--ml = "0000e30000000096k",
--mlr = "00000000b9960000h",
--ms = "0000000071000000j",
--msr = "00000000b2520000h",
--msy = "0000e30000000051k",
--msg = "0000e3000000000ck",
--msgr = "00000000b90c0000h",
--msgf = "0000e3000000001ck",
--msgfr = "00000000b91c0000h",
--msfi = "0000c20100000000l",
--msgfi = "0000c20000000000l",
--o = "0000000056000000j",
--["or"] = "0000000000001600g",
--oy = "0000e30000000056k",
--og = "0000e30000000081k",
--ogr = "00000000b9810000h",
--oihf = "0000c00c00000000l",
--oilf = "0000c00d00000000l",
--pgin = "00000000b22e0000h",
--pgout = "00000000b22f0000h",
--pcc = "00000000b92c0000h",
--pckmo = "00000000b9280000h",
--pfmf = "00000000b9af0000h",
--ptf = "00000000b9a20000h",
--popcnt = "00000000b9e10000h",
--pfd = "0000e30000000036k",
--pfdrl = "0000c60200000000l",
--pt = "00000000b2280000h",
--pti = "00000000b99e0000h",
--palb = "00000000b2480000h",
--rrbe = "00000000b22a0000h",
--rrbm = "00000000b9ae0000h",
--rll = "0000eb000000001dn",
--rllg = "0000eb000000001cn",
--srst = "00000000b25e0000h",
--srstu = "00000000b9be0000h",
--sar = "00000000b24e0000h",
--sfpc = "00000000b3840000h",
--sfasr = "00000000b3850000h",
--spm = "000000000000400g",
--ssar = "00000000b2250000h",
--ssair = "00000000b99f0000h",
--slda = "000000008f000000m",
--sldl = "000000008d000000m",
--sla = "000000008b000000m",
--slak = "0000eb00000000ddn",
--slag = "0000eb000000000bn",
--sll = "0000000089000000m",
--sllk = "0000eb00000000dfn",
--sllg = "0000eb000000000dn",
--srda = "000000008e000000m",
--srdl = "000000008c000000m",
--sra = "000000008a000000m",
--srak = "0000eb00000000dcn",
--srag = "0000eb000000000an",
--srl = "0000000088000000m",
--srlk = "0000eb00000000den",
--srlg = "0000eb000000000cn",
--sqxbr = "00000000b3160000h",
--sqxr = "00000000b3360000h",
--sqdbr = "00000000b3150000h",
--sqdr = "00000000b2440000h",
--sqebr = "00000000b3140000h",
--sqer = "00000000b2450000h",
--st = "0000000050000000j",
--sty = "0000e30000000050k",
--stg = "0000e30000000024k",
--std = "0000000060000000j",
--stdy = "0000ed0000000067k",
--ste = "0000000070000000j",
--stey = "0000ed0000000066k",
--stam = "000000009b000000m",
--stamy = "0000eb000000009bn",
--stc = "0000000042000000j",
--stcy = "0000e30000000072k",
--stch = "0000e300000000c3k",
--stcmh = "0000eb000000002cn",
--stcm = "00000000be000000m",
--stcmy = "0000eb000000002dn",
--stctl = "00000000b6000000m",
--stctg = "0000eb0000000025n",
--sth = "0000000040000000j",
--sthy = "0000e30000000070k",
--sthh = "0000e300000000c7k",
--sthrl = "0000c40700000000l",
--stfh = "0000e300000000cbk",
--stm = "0000000090000000m",
--stmy = "0000eb0000000090n",
--stmg = "0000eb0000000024n",
--stmh = "0000eb0000000026n",
--stoc = "0000eb00000000f3n",
--stocg = "0000eb00000000e3n",
--stpq = "0000e3000000008ek",
--strl = "0000c40f00000000l",
--stgrl = "0000c40b00000000l",
--strvh = "0000e3000000003fk",
--strv = "0000e3000000003ek",
--strvg = "0000e3000000002fk",
--stura = "00000000b2460000h",
--sturg = "00000000b9250000h",
--s = "000000005b000000j",
--sr = "0000000000001b00g",
--sy = "0000e3000000005bk",
--sg = "0000e30000000009k",
--sgr = "00000000b9090000h",
--sgf = "0000e30000000019k",
--sgfr = "00000000b9190000h",
--sxbr = "00000000b34b0000h",
--sdbr = "00000000b31b0000h",
--sebr = "00000000b30b0000h",
--sh = "000000004b000000j",
--shy = "0000e3000000007bk",
--sl = "000000005f000000j",
--slr = "0000000000001f00g",
--sly = "0000e3000000005fk",
--slg = "0000e3000000000bk",
--slgr = "00000000b90b0000h",
--slgf = "0000e3000000001bk",
--slgfr = "00000000b91b0000h",
--slfi = "0000c20500000000l",
--slgfi = "0000c20400000000l",
--slb = "0000e30000000099k",
--slbr = "00000000b9990000h",
--slbg = "0000e30000000089k",
--slbgr = "00000000b9890000h",
--sxr = "0000000000003700g",
--sd = "000000006b000000j",
--sdr = "0000000000002b00g",
--se = "000000007b000000j",
--ser = "0000000000003b00g",
--su = "000000007f000000j",
--sur = "0000000000003f00g",
--sw = "000000006f000000j",
--swr = "0000000000002f00g",
--tar = "00000000b24c0000h",
--tb = "00000000b22c0000h",
--trace = "0000000099000000m",
--tracg = "0000eb000000000fn",
--tre = "00000000b2a50000h",
-+a_4 = "000000005a000000j",
-+ar_2 = "0000000000001a00g",
-+ay_5 = "0000e3000000005al",
-+ag_5 = "0000e30000000008l",
-+agr_2 = "00000000b9080000h",
-+agf_5 = "0000e30000000018l",
-+agfr_2 = "00000000b9180000h",
-+axbr_2 = "00000000b34a0000h",
-+adbr_2 = "00000000b31a0000h",
-+aebr_2 = "00000000b30a0000h",
-+ah_4 = "000000004a000000j",
-+ahy_5 = "0000e3000000007al",
-+afi_3 = "0000c20900000000n",
-+agfi_3 = "0000c20800000000n",
-+aih_3 = "0000cc0800000000n",
-+al_4 = "000000005e000000j",
-+alr_2 = "0000000000001e00g",
-+aly_5 = "0000e3000000005el",
-+alg_5 = "0000e3000000000al",
-+algr_2 = "00000000b90a0000h",
-+algf_5 = "0000e3000000001al",
-+algfr_2 = "00000000b91a0000h",
-+alfi_3 = "0000c20b00000000n",
-+algfi_3 = "0000c20a00000000n",
-+alc_5 = "0000e30000000098l",
-+alcr_2 = "00000000b9980000h",
-+alcg_5 = "0000e30000000088l",
-+alcgr_2 = "00000000b9880000h",
-+alsih_3 = "0000cc0a00000000n",
-+alsihn_3 = "0000cc0b00000000n",
-+axr_2 = "0000000000003600g",
-+ad_4 = "000000006a000000j",
-+adr_2 = "0000000000002a00g",
-+ae_4 = "000000007a000000j",
-+aer_2 = "0000000000003a00g",
-+aw_4 = "000000006e000000j",
-+awr_2 = "0000000000002e00g",
-+au_4 = "000000007e000000j",
-+aur_2 = "0000000000003e00g",
-+n_4 = "0000000054000000j",
-+nr_2 = "0000000000001400g",
-+ny_5 = "0000e30000000054l",
-+ng_5 = "0000e30000000080l",
-+ngr_2 = "00000000b9800000h",
-+nihf_3 = "0000c00a00000000n",
-+nilf_3 = "0000c00b00000000n",
-+bal_4 = "0000000045000000j",
-+balr_2 = "000000000000500g",
-+bas_4 = "000000004d000000j",
-+basr_2 = "0000000000000d00g",
-+bassm_2 = "0000000000000c00g",
-+bsa_2 = "00000000b25a0000h",
-+bsm_2 = "0000000000000b00g",
-+bakr_2 = "00000000b2400000h",
-+bsg_2 = "00000000b2580000h",
-+bc_4 = "0000000047000000k",
-+bcr_2 = "000000000000700g",
-+bct_4 = "0000000046000000j",
-+bctr_2 = "000000000000600g",
-+bctg_5 = "0000e30000000046l",
-+bctgr_2 = "00000000b9460000h",
-+bxh_4 = "0000000086000000q",
-+bxhg_5 = "0000eb0000000044s",
-+bxle_4 = "0000000087000000q",
-+bxleg_5 = "0000eb0000000045s",
-+brasl_3 = "0000c00500000000o",
-+brcl_3 = "0000c00400000000p",
-+brcth_3 = "0000cc0600000000o",
-+cksm_2 = "00000000b2410000h",
-+km_2 = "00000000b92e0000h",
-+kmf_2 = "00000000b92a0000h",
-+kmc_2 = "00000000b92f0000h",
-+kmo_2 = "00000000b92b0000h",
-+c_4 = "0000000059000000j",
-+cr_2 = "0000000000001900g",
-+cy_5 = "0000e30000000059l",
-+cg_5 = "0000e30000000020l",
-+cgr_2 = "00000000b9200000h",
-+cgf_5 = "0000e30000000030l",
-+cgfr_2 = "00000000b9300000h",
-+cxbr_2 = "00000000b3490000h",
-+cxtr_2 = "00000000b3ec0000h",
-+cxr_2 = "00000000b3690000h",
-+cdbr_2 = "00000000b3190000h",
-+cdtr_2 = "00000000b3e40000h",
-+cd_4 = "0000000069000000j",
-+cdr_2 = "0000000000002900g",
-+cebr_2 = "00000000b3090000h",
-+ce_4 = "0000000079000000j",
-+cer_2 = "0000000000003900g",
-+kxbr_2 = "00000000b3480000h",
-+kxtr_2 = "00000000b3e80000h",
-+kdbr_2 = "00000000b3180000h",
-+kdtr_2 = "00000000b3e00000h",
-+kebr_2 = "00000000b3080000h",
-+cs_4 = "00000000ba000000q",
-+csy_5 = "0000eb0000000014s",
-+csg_5 = "0000eb0000000030s",
-+csp_2 = "00000000b2500000h",
-+cspg_2 = "00000000b98a0000h",
-+cextr_2 = "00000000b3fc0000h",
-+cedtr_2 = "00000000b3f40000h",
-+cds_4 = "00000000bb000000q",
-+cdsy_5 = "0000eb0000000031s",
-+cdsg_5 = "0000eb000000003es",
-+ch_4 = "0000000049000000j",
-+chy_5 = "0000e30000000079l",
-+cgh_5 = "0000e30000000034l",
-+chrl_3 = "0000c60500000000o",
-+cghrl_3 = "0000c60400000000o",
-+chf_5 = "0000e300000000cdl",
-+chhr_2 = "00000000b9cd0000h",
-+chlr_2 = "00000000b9dd0000h",
-+cfi_3 = "0000c20d00000000n",
-+cgfi_3 = "0000c20c00000000n",
-+cih_3 = "0000cc0d00000000n",
-+cl_4 = "0000000055000000j",
-+clr_2 = "0000000000001500g",
-+cly_5 = "0000e30000000055l",
-+clg_5 = "0000e30000000021l",
-+clgr_2 = "00000000b9210000h",
-+clgf_5 = "0000e30000000031l",
-+clgfr_2 = "00000000b9310000h",
-+clmh_5 = "0000eb0000000020t",
-+clm_4 = "00000000bd000000r",
-+clmy_5 = "0000eb0000000021t",
-+clhf_5 = "0000e300000000cfl",
-+clhhr_2 = "00000000b9cf0000h",
-+clhlr_2 = "00000000b9df0000h",
-+clfi_3 = "0000c20f00000000n",
-+clgfi_3 = "0000c20e00000000n",
-+clih_3 = "0000cc0f00000000n",
-+clcl_2 = "0000000000000f00g",
-+clcle_4 = "00000000a9000000q",
-+clclu_5 = "0000eb000000008fs",
-+clrl_3 = "0000c60f00000000o",
-+clhrl_3 = "0000c60700000000o",
-+clgrl_3 = "0000c60a00000000o",
-+clghrl_3 = "0000c60600000000o",
-+clgfrl_3 = "0000c60e00000000o",
-+clst_2 = "00000000b25d0000h",
-+crl_3 = "0000c60d00000000o",
-+cgrl_3 = "0000c60800000000o",
-+cgfrl_3 = "0000c60c00000000o",
-+cuse_2 = "00000000b2570000h",
-+cmpsc_2 = "00000000b2630000h",
-+kimd_2 = "00000000b93e0000h",
-+klmd_2 = "00000000b93f0000h",
-+kmac_2 = "00000000b91e0000h",
-+thdr_2 = "00000000b3590000h",
-+thder_2 = "00000000b3580000h",
-+cxfbr_2 = "00000000b3960000h",
-+cxftr_2 = "00000000b9590000h",
-+cxfr_2 = "00000000b3b60000h",
-+cdfbr_2 = "00000000b3950000h",
-+cdftr_2 = "00000000b9510000h",
-+cdfr_2 = "00000000b3b50000h",
-+cefbr_2 = "00000000b3940000h",
-+cefr_2 = "00000000b3b40000h",
-+cxgbr_2 = "00000000b3a60000h",
-+cxgtr_2 = "00000000b3f90000h",
-+cxgr_2 = "00000000b3c60000h",
-+cdgbr_2 = "00000000b3a50000h",
-+cdgtr_2 = "00000000b3f10000h",
-+cdgr_2 = "00000000b3c50000h",
-+cegbr_2 = "00000000b3a40000h",
-+cegr_2 = "00000000b3c40000h",
-+cxstr_2 = "00000000b3fb0000h",
-+cdstr_2 = "00000000b3f30000h",
-+cxutr_2 = "00000000b3fa0000h",
-+cdutr_2 = "00000000b3f20000h",
-+cvb_4 = "000000004f000000j",
-+cvby_5 = "0000e30000000006l",
-+cvbg_5 = "0000e3000000000el",
-+cvd_4 = "000000004e000000j",
-+cvdy_5 = "0000e30000000026l",
-+cvdg_5 = "0000e3000000002el",
-+cuxtr_2 = "00000000b3ea0000h",
-+cudtr_2 = "00000000b3e20000h",
-+cu42_2 = "00000000b9b30000h",
-+cu41_2 = "00000000b9b20000h",
-+cpya_2 = "00000000b24d0000h",
-+d_4 = "000000005d000000j",
-+dr_2 = "0000000000001d00g",
-+dxbr_2 = "00000000b34d0000h",
-+dxr_2 = "00000000b22d0000h",
-+ddbr_2 = "00000000b31d0000h",
-+dd_4 = "000000006d000000j",
-+ddr_2 = "0000000000002d00g",
-+debr_2 = "00000000b30d0000h",
-+de_4 = "000000007d000000j",
-+der_2 = "0000000000003d00g",
-+dl_5 = "0000e30000000097l",
-+dlr_2 = "00000000b9970000h",
-+dlg_5 = "0000e30000000087l",
-+dlgr_2 = "00000000b9870000h",
-+dsg_5 = "0000e3000000000dl",
-+dsgr_2 = "00000000b90d0000h",
-+dsgf_5 = "0000e3000000001dl",
-+dsgfr_2 = "00000000b91d0000h",
-+x_4 = "0000000057000000j",
-+xr_2 = "0000000000001700g",
-+xy_5 = "0000e30000000057l",
-+xg_5 = "0000e30000000082l",
-+xgr_2 = "00000000b9820000h",
-+xihf_3 = "0000c00600000000n",
-+xilf_3 = "0000c00700000000n",
-+ex_4 = "0000000044000000j",
-+exrl_3 = "0000c60000000000o",
-+ear_2 = "00000000b24f0000h",
-+esea_2 = "00000000b99d0000h",
-+eextr_2 = "00000000b3ed0000h",
-+eedtr_2 = "00000000b3e50000h",
-+ecag_5 = "0000eb000000004cs",
-+efpc_2 = "00000000b38c0000h",
-+epar_2 = "00000000b2260000h",
-+epair_2 = "00000000b99a0000h",
-+epsw_2 = "00000000b98d0000h",
-+esar_2 = "00000000b2270000h",
-+esair_2 = "00000000b99b0000h",
-+esxtr_2 = "00000000b3ef0000h",
-+esdtr_2 = "00000000b3e70000h",
-+ereg_2 = "00000000b2490000h",
-+eregg_2 = "00000000b90e0000h",
-+esta_2 = "00000000b24a0000h",
-+flogr_2 = "00000000b9830000h",
-+hdr_2 = "0000000000002400g",
-+her_2 = "0000000000003400g",
-+iac_2 = "00000000b2240000h",
-+ic_4 = "0000000043000000j",
-+icy_5 = "0000e30000000073l",
-+icmh_5 = "0000eb0000000080t",
-+icm_4 = "00000000bf000000r",
-+icmy_5 = "0000eb0000000081t",
-+iihf_3 = "0000c00800000000n",
-+iilf_3 = "0000c00900000000n",
-+ipm_2 = "00000000b2220000h",
-+iske_2 = "00000000b2290000h",
-+ivsk_2 = "00000000b2230000h",
-+l_4 = "0000000058000000j",
-+lr_2 = "0000000000001800g",
-+ly_5 = "0000e30000000058l",
-+lg_5 = "0000e30000000004l",
-+lgr_2 = "00000000b9040000h",
-+lgf_5 = "0000e30000000014l",
-+lgfr_2 = "00000000b9140000h",
-+lxr_2 = "00000000b3650000h",
-+ld_4 = "0000000068000000j",
-+ldr_2 = "0000000000002800g",
-+ldy_5 = "0000ed0000000065l",
-+le_4 = "0000000078000000j",
-+ler_2 = "0000000000003800g",
-+ley_5 = "0000ed0000000064l",
-+lam_4 = "000000009a000000q",
-+lamy_5 = "0000eb000000009as",
-+la_4 = "0000000041000000j",
-+lay_5 = "0000e30000000071l",
-+lae_4 = "0000000051000000j",
-+laey_5 = "0000e30000000075l",
-+larl_3 = "0000c00000000000o",
-+laa_5 = "0000eb00000000f8s",
-+laag_5 = "0000eb00000000e8s",
-+laal_5 = "0000eb00000000fas",
-+laalg_5 = "0000eb00000000eas",
-+lan_5 = "0000eb00000000f4s",
-+lang_5 = "0000eb00000000e4s",
-+lax_5 = "0000eb00000000f7s",
-+laxg_5 = "0000eb00000000e7s",
-+lao_5 = "0000eb00000000f6s",
-+laog_5 = "0000eb00000000e6s",
-+lt_5 = "0000e30000000012l",
-+ltr_2 = "0000000000001200g",
-+ltg_5 = "0000e30000000002l",
-+ltgr_2 = "00000000b9020000h",
-+ltgf_5 = "0000e30000000032l",
-+ltgfr_2 = "00000000b9120000h",
-+ltxbr_2 = "00000000b3420000h",
-+ltxtr_2 = "00000000b3de0000h",
-+ltxr_2 = "00000000b3620000h",
-+ltdbr_2 = "00000000b3120000h",
-+ltdtr_2 = "00000000b3d60000h",
-+ltdr_2 = "0000000000002200g",
-+ltebr_2 = "00000000b3020000h",
-+lter_2 = "0000000000003200g",
-+lb_5 = "0000e30000000076l",
-+lbr_2 = "00000000b9260000h",
-+lgb_5 = "0000e30000000077l",
-+lgbr_2 = "00000000b9060000h",
-+lbh_5 = "0000e300000000c0l",
-+lcr_2 = "0000000000001300g",
-+lcgr_2 = "00000000b9030000h",
-+lcgfr_2 = "00000000b9130000h",
-+lcxbr_2 = "00000000b3430000h",
-+lcxr_2 = "00000000b3630000h",
-+lcdbr_2 = "00000000b3130000h",
-+lcdr_2 = "0000000000002300g",
-+lcdfr_2 = "00000000b3730000h",
-+lcebr_2 = "00000000b3030000h",
-+lcer_2 = "0000000000003300g",
-+lctl_4 = "00000000b7000000q",
-+lctlg_5 = "0000eb000000002fs",
-+fixr_2 = "00000000b3670000h",
-+fidr_2 = "00000000b37f0000h",
-+fier_2 = "00000000b3770000h",
-+ldgr_2 = "00000000b3c10000h",
-+lgdr_2 = "00000000b3cd0000h",
-+lh_4 = "0000000048000000j",
-+lhr_2 = "00000000b9270000h",
-+lhy_5 = "0000e30000000078l",
-+lgh_5 = "0000e30000000015l",
-+lghr_2 = "00000000b9070000h",
-+lhh_5 = "0000e300000000c4l",
-+lhrl_3 = "0000c40500000000o",
-+lghrl_3 = "0000c40400000000o",
-+lfh_5 = "0000e300000000cal",
-+lgfi_3 = "0000c00100000000n",
-+lxdbr_2 = "00000000b3050000h",
-+lxdr_2 = "00000000b3250000h",
-+lxebr_2 = "00000000b3060000h",
-+lxer_2 = "00000000b3260000h",
-+ldebr_2 = "00000000b3040000h",
-+lder_2 = "00000000b3240000h",
-+llgf_5 = "0000e30000000016l",
-+llgfr_2 = "00000000b9160000h",
-+llc_5 = "0000e30000000094l",
-+llcr_2 = "00000000b9940000h",
-+llgc_5 = "0000e30000000090l",
-+llgcr_2 = "00000000b9840000h",
-+llch_5 = "0000e300000000c2l",
-+llh_5 = "0000e30000000095l",
-+llhr_2 = "00000000b9950000h",
-+llgh_5 = "0000e30000000091l",
-+llghr_2 = "00000000b9850000h",
-+llhh_5 = "0000e300000000c6l",
-+llhrl_3 = "0000c40200000000o",
-+llghrl_3 = "0000c40600000000o",
-+llihf_3 = "0000c00e00000000n",
-+llilf_3 = "0000c00f00000000n",
-+llgfrl_3 = "0000c40e00000000o",
-+llgt_5 = "0000e30000000017l",
-+llgtr_2 = "00000000b9170000h",
-+lm_4 = "0000000098000000q",
-+lmy_5 = "0000eb0000000098s",
-+lmg_5 = "0000eb0000000004s",
-+lmh_5 = "0000eb0000000096s",
-+lnr_2 = "0000000000001100g",
-+lngr_2 = "00000000b9010000h",
-+lngfr_2 = "00000000b9110000h",
-+lnxbr_2 = "00000000b3410000h",
-+lnxr_2 = "00000000b3610000h",
-+lndbr_2 = "00000000b3110000h",
-+lndr_2 = "0000000000002100g",
-+lndfr_2 = "00000000b3710000h",
-+lnebr_2 = "00000000b3010000h",
-+lner_2 = "0000000000003100g",
-+loc_5 = "0000eb00000000f2t",
-+locg_5 = "0000eb00000000e2t",
-+lpq_5 = "0000e3000000008fl",
-+lpr_2 = "0000000000001000g",
-+lpgr_2 = "00000000b9000000h",
-+lpgfr_2 = "00000000b9100000h",
-+lpxbr_2 = "00000000b3400000h",
-+lpxr_2 = "00000000b3600000h",
-+lpdbr_2 = "00000000b3100000h",
-+lpdr_2 = "0000000000002000g",
-+lpdfr_2 = "00000000b3700000h",
-+lpebr_2 = "00000000b3000000h",
-+lper_2 = "0000000000003000g",
-+lra_4 = "00000000b1000000j",
-+lray_5 = "0000e30000000013l",
-+lrag_5 = "0000e30000000003l",
-+lrl_3 = "0000c40d00000000o",
-+lgrl_3 = "0000c40800000000o",
-+lgfrl_3 = "0000c40c00000000o",
-+lrvh_5 = "0000e3000000001fl",
-+lrv_5 = "0000e3000000001el",
-+lrvr_2 = "00000000b91f0000h",
-+lrvg_5 = "0000e3000000000fl",
-+lrvgr_2 = "00000000b90f0000h",
-+ldxbr_2 = "00000000b3450000h",
-+ldxr_2 = "0000000000002500g",
-+lrdr_2 = "0000000000002500g",
-+lexbr_2 = "00000000b3460000h",
-+lexr_2 = "00000000b3660000h",
-+ledbr_2 = "00000000b3440000h",
-+ledr_2 = "0000000000003500g",
-+lrer_2 = "0000000000003500g",
-+lura_2 = "00000000b24b0000h",
-+lurag_2 = "00000000b9050000h",
-+lzxr_2 = "00000000b3760000h",
-+lzdr_2 = "00000000b3750000h",
-+lzer_2 = "00000000b3740000h",
-+msta_2 = "00000000b2470000h",
-+mvcl_2 = "0000000000000e00g",
-+mvcle_4 = "00000000a8000000q",
-+mvclu_5 = "0000eb000000008es",
-+mvpg_2 = "00000000b2540000h",
-+mvst_2 = "00000000b2550000h",
-+m_4 = "000000005c000000j",
-+mfy_5 = "0000e3000000005cl",
-+mr_2 = "0000000000001c00g",
-+mxbr_2 = "00000000b34c0000h",
-+mxr_2 = "0000000000002600g",
-+mdbr_2 = "00000000b31c0000h",
-+md_4 = "000000006c000000j",
-+mdr_2 = "0000000000002c00g",
-+mxdbr_2 = "00000000b3070000h",
-+mxd_4 = "0000000067000000j",
-+mxdr_2 = "0000000000002700g",
-+meebr_2 = "00000000b3170000h",
-+meer_2 = "00000000b3370000h",
-+mdebr_2 = "00000000b30c0000h",
-+mde_4 = "000000007c000000j",
-+mder_2 = "0000000000003c00g",
-+me_4 = "000000007c000000j",
-+mer_2 = "0000000000003c00g",
-+mh_4 = "000000004c000000j",
-+mhy_5 = "0000e3000000007cl",
-+mlg_5 = "0000e30000000086l",
-+mlgr_2 = "00000000b9860000h",
-+ml_5 = "0000e30000000096l",
-+mlr_2 = "00000000b9960000h",
-+ms_4 = "0000000071000000j",
-+msr_2 = "00000000b2520000h",
-+msy_5 = "0000e30000000051l",
-+msg_5 = "0000e3000000000cl",
-+msgr_2 = "00000000b90c0000h",
-+msgf_5 = "0000e3000000001cl",
-+msgfr_2 = "00000000b91c0000h",
-+msfi_3 = "0000c20100000000n",
-+msgfi_3 = "0000c20000000000n",
-+o_4 = "0000000056000000j",
-+["or_2"] = "0000000000001600g",
-+oy_5 = "0000e30000000056l",
-+og_5 = "0000e30000000081l",
-+ogr_2 = "00000000b9810000h",
-+oihf_3 = "0000c00c00000000n",
-+oilf_3 = "0000c00d00000000n",
-+pgin_2 = "00000000b22e0000h",
-+pgout_2 = "00000000b22f0000h",
-+pcc_2 = "00000000b92c0000h",
-+pckmo_2 = "00000000b9280000h",
-+pfmf_2 = "00000000b9af0000h",
-+ptf_2 = "00000000b9a20000h",
-+popcnt_2 = "00000000b9e10000h",
-+pfd_5 = "0000e30000000036m",
-+pfdrl_3 = "0000c60200000000p",
-+pt_2 = "00000000b2280000h",
-+pti_2 = "00000000b99e0000h",
-+palb_2 = "00000000b2480000h",
-+rrbe_2 = "00000000b22a0000h",
-+rrbm_2 = "00000000b9ae0000h",
-+rll_5 = "0000eb000000001ds",
-+rllg_5 = "0000eb000000001cs",
-+srst_2 = "00000000b25e0000h",
-+srstu_2 = "00000000b9be0000h",
-+sar_2 = "00000000b24e0000h",
-+sfpc_2 = "00000000b3840000h",
-+sfasr_2 = "00000000b3850000h",
-+spm_2 = "000000000000400g",
-+ssar_2 = "00000000b2250000h",
-+ssair_2 = "00000000b99f0000h",
-+slda_4 = "000000008f000000q",
-+sldl_4 = "000000008d000000q",
-+sla_4 = "000000008b000000q",
-+slak_5 = "0000eb00000000dds",
-+slag_5 = "0000eb000000000bs",
-+sll_4 = "0000000089000000q",
-+sllk_5 = "0000eb00000000dfs",
-+sllg_5 = "0000eb000000000ds",
-+srda_4 = "000000008e000000q",
-+srdl_4 = "000000008c000000q",
-+sra_4 = "000000008a000000q",
-+srak_5 = "0000eb00000000dcs",
-+srag_5 = "0000eb000000000as",
-+srl_4 = "0000000088000000q",
-+srlk_5 = "0000eb00000000des",
-+srlg_5 = "0000eb000000000cs",
-+sqxbr_2 = "00000000b3160000h",
-+sqxr_2 = "00000000b3360000h",
-+sqdbr_2 = "00000000b3150000h",
-+sqdr_2 = "00000000b2440000h",
-+sqebr_2 = "00000000b3140000h",
-+sqer_2 = "00000000b2450000h",
-+st_4 = "0000000050000000j",
-+sty_5 = "0000e30000000050l",
-+stg_5 = "0000e30000000024l",
-+std_4 = "0000000060000000j",
-+stdy_5 = "0000ed0000000067l",
-+ste_4 = "0000000070000000j",
-+stey_5 = "0000ed0000000066l",
-+stam_4 = "000000009b000000q",
-+stamy_5 = "0000eb000000009bs",
-+stc_4 = "0000000042000000j",
-+stcy_5 = "0000e30000000072l",
-+stch_5 = "0000e300000000c3l",
-+stcmh_5 = "0000eb000000002ct",
-+stcm_4 = "00000000be000000r",
-+stcmy_5 = "0000eb000000002dt",
-+stctl_4 = "00000000b6000000q",
-+stctg_5 = "0000eb0000000025s",
-+sth_4 = "0000000040000000j",
-+sthy_5 = "0000e30000000070l",
-+sthh_5 = "0000e300000000c7l",
-+sthrl_3 = "0000c40700000000o",
-+stfh_5 = "0000e300000000cbl",
-+stm_4 = "0000000090000000q",
-+stmy_5 = "0000eb0000000090s",
-+stmg_5 = "0000eb0000000024s",
-+stmh_5 = "0000eb0000000026s",
-+stoc_5 = "0000eb00000000f3t",
-+stocg_5 = "0000eb00000000e3t",
-+stpq_5 = "0000e3000000008el",
-+strl_3 = "0000c40f00000000o",
-+stgrl_3 = "0000c40b00000000o",
-+strvh_5 = "0000e3000000003fl",
-+strv_5 = "0000e3000000003el",
-+strvg_5 = "0000e3000000002fl",
-+stura_2 = "00000000b2460000h",
-+sturg_2 = "00000000b9250000h",
-+s_4 = "000000005b000000j",
-+sr_2 = "0000000000001b00g",
-+sy_5 = "0000e3000000005bl",
-+sg_5 = "0000e30000000009l",
-+sgr_2 = "00000000b9090000h",
-+sgf_5 = "0000e30000000019l",
-+sgfr_2 = "00000000b9190000h",
-+sxbr_2 = "00000000b34b0000h",
-+sdbr_2 = "00000000b31b0000h",
-+sebr_2 = "00000000b30b0000h",
-+sh_4 = "000000004b000000j",
-+shy_5 = "0000e3000000007bl",
-+sl_4 = "000000005f000000j",
-+slr_2 = "0000000000001f00g",
-+sly_5 = "0000e3000000005fl",
-+slg_5 = "0000e3000000000bl",
-+slgr_2 = "00000000b90b0000h",
-+slgf_5 = "0000e3000000001bl",
-+slgfr_2 = "00000000b91b0000h",
-+slfi_3 = "0000c20500000000n",
-+slgfi_3 = "0000c20400000000n",
-+slb_5 = "0000e30000000099l",
-+slbr_2 = "00000000b9990000h",
-+slbg_5 = "0000e30000000089l",
-+slbgr_2 = "00000000b9890000h",
-+sxr_2 = "0000000000003700g",
-+sd_4 = "000000006b000000j",
-+sdr_2 = "0000000000002b00g",
-+se_4 = "000000007b000000j",
-+ser_2 = "0000000000003b00g",
-+su_4 = "000000007f000000j",
-+sur_2 = "0000000000003f00g",
-+sw_4 = "000000006f000000j",
-+swr_2 = "0000000000002f00g",
-+tar_2 = "00000000b24c0000h",
-+tb_2 = "00000000b22c0000h",
-+trace_4 = "0000000099000000q",
-+tracg_5 = "0000eb000000000fs",
-+tre_2 = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
-@@ -1230,13 +1230,17 @@ local function parse_template(params, template, nparams, pos)
- local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=param[n],param[n+1]
-- op = op + parse_reg(pr1)+parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
- elseif p == "h" then
--
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
- elseif p == "j" then
--
-+ op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
-+ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
-+
- elseif p == "k" then
--
-+ op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
-+ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
- elseif p == "l" then
-
- elseif p == "m" then
-
-From 0e3241180f4e5e54a45e147bbedfa022d4bccb58 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 29 Nov 2016 13:45:59 -0500
-Subject: [PATCH 038/260] Various cleanup of dasm_s390x.lua
-
- - Fix syntax errors
- - Fix whitespace (use two-space indentation to match surrounding code)
----
- dynasm/dasm_s390x.lua | 23 +++++++++++------------
- 1 file changed, 11 insertions(+), 12 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index f1d492c12..2ae9e5944 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1225,29 +1225,28 @@ local function parse_template(params, template, nparams, pos)
- local n,rs = 1,26
-
- parse_reg_type = false
-- -- Process each character. (if its RX-a==> 1st iteration gets R, 2nd==X and so on)
-+ -- Process each character.
- for p in gmatch(sub(template, 17), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
- elseif p == "h" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ pr1,pr2=param[n],param[n+1]
-+ op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
- elseif p == "j" then
-- op = op + shl(parse_reg(param[1],24) + shl(parse_reg(param[2],20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
-- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
--
-+ op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
-+ -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
- elseif p == "k" then
-- op = op + shl(parse_reg(param[1],40) + shl(parse_reg(param[2],36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) parse_number(param[5])
-- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
-+ op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
-+ -- assuming params are passed as (R1,X2,B2,DL2,DH2)
- elseif p == "l" then
-
- elseif p == "m" then
-
- elseif p == "n" then
--
-- end
-+
-+ end
- end
- wputpos(pos, op)
- end
-
-From 89ca41cca5537f4b4fe7d2802997f1a8626ddc22 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 29 Nov 2016 13:59:37 -0500
-Subject: [PATCH 039/260] Add sp -> r15 mapping and don't special case or_2
-
-It's convenient for sp to be a pseudonym for r15 (the stack pointer).
-'or_2' doesn't need to be special cased ('or' did because it is a
-keyword).
----
- dynasm/dasm_s390x.lua | 11 ++++-------
- 1 file changed, 4 insertions(+), 7 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2ae9e5944..0ec789334 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -214,15 +214,12 @@ end
- ------------------------------------------------------------------------------
-
- -- Arch-specific maps.
---- TODO: add s390x related register names
- -- Ext. register name -> int. name.
----local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
--local map_archdef = {}
-+local map_archdef = { sp = "r15" }
-
- -- Int. register name -> ext. name.
---- local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
--local map_reg_rev = {}
--
-+local map_reg_rev = { r15 = "sp" }
-+
- local map_type = {} -- Type name -> { ctype, reg }
- local ctypenum = 0 -- Type number (for Dt... macros).
-
-@@ -1077,7 +1074,7 @@ msgfr_2 = "00000000b91c0000h",
- msfi_3 = "0000c20100000000n",
- msgfi_3 = "0000c20000000000n",
- o_4 = "0000000056000000j",
--["or_2"] = "0000000000001600g",
-+or_2 = "0000000000001600g",
- oy_5 = "0000e30000000056l",
- og_5 = "0000e30000000081l",
- ogr_2 = "00000000b9810000h",
-
-From 36479af87a0cd75781b5626152da70ab9f7b2f0a Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 29 Nov 2016 15:24:11 -0500
-Subject: [PATCH 040/260] Add stubs for parsing memory operands and delete
- unwanted code.
-
-Each memory operand will be a single parameter so we also need
-to update the instruction encoding nargs field.
----
- dynasm/dasm_s390x.h | 2 +-
- dynasm/dasm_s390x.lua | 333 ++++--------------------------------------
- 2 files changed, 30 insertions(+), 305 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 577920ac9..b327e7a60 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -21,7 +21,7 @@ enum {
- /* The following actions need a buffer position. */
- DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
- /* The following actions also have an argument. */
-- DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
-+ DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
- DASM__MAX
- };
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 0ec789334..556f7fe4d 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn
- local action_names = {
- "STOP", "SECTION", "ESC", "REL_EXT",
- "ALIGN", "REL_LG", "LABEL_LG",
-- "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
-+ "REL_PC", "LABEL_PC", "DISP12", "DISP20", "IMM16", "IMM32",
- }
-
- -- Maximum number of section buffer positions for dasm_put().
-@@ -227,13 +227,6 @@ local ctypenum = 0 -- Type number (for Dt... macros).
- function _M.revdef(s)
- return map_reg_rev[s] or s
- end
---- not sure of these
--local map_shift = { lsl = 0, lsr = 1, asr = 2, }
--
--local map_extend = {
-- uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
-- sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
--}
-
- local map_cond = {
- o = 1, h = 2, hle = 3, l = 4,
-@@ -246,13 +239,11 @@ local map_cond = {
-
- local parse_reg_type
-
--
- local function parse_gpr(expr)
-- -- assuming we get r0-r31 for now
- local r = match(expr, "^r([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
-- if r <= 31 then return r, tp end
-+ if r <= 15 then return r, tp end
- end
- werror("bad register name `"..expr.."'")
- end
-@@ -261,23 +252,11 @@ local function parse_fpr(expr)
- local r = match(expr, "^f([1-3]?[0-9])$")
- if r then
- r = tonumber(r)
-- if r <= 31 then return r end
-+ if r <= 15 then return r end
- end
- werror("bad register name `"..expr.."'")
- end
-
--
--
--
--
--local function parse_reg_base(expr)
-- if expr == "sp" then return 0x3e0 end
-- local base, tp = parse_reg(expr)
-- if parse_reg_type ~= "x" then werror("bad register type") end
-- parse_reg_type = false
-- return shl(base, 5), tp -- why is it shifted not able to make out
--end
--
- local parse_ctx = {}
-
- local loadenv = setfenv and function(s)
-@@ -300,262 +279,35 @@ local function parse_number(n)
- return nil
- end
-
--local function parse_imm(imm, bits, shift, scale, signed)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- local m = sar(n, scale)
-- if shl(m, scale) == n then
-- if signed then
-- local s = sar(m, bits-1)
-- if s == 0 then return shl(m, shift)
-- elseif s == -1 then return shl(m + shl(1, bits), shift) end
-- else
-- if sar(m, bits) == 0 then return shl(m, shift) end
-- end
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
-- return 0
-- end
--end
--
--local function parse_imm12(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- if shr(n, 12) == 0 then
-- return shl(n, 10)
-- elseif band(n, 0xff000fff) == 0 then
-- return shr(n, 2) + 0x00400000
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM12", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm13(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- local r64 = parse_reg_type == "x"
-- if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
-- local inv = false
-- if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
-- local t = {}
-- for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
-- local b = table.concat(t)
-- b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
-- local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
-- if p0 then
-- local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
-- if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
-- local s = band(-2*w, 0x3f) - 1
-- if w == 64 then s = s + 0x1000 end
-- if inv then
-- return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
-- else
-- return shl(w-#p0, 16) + shl(s+#p1, 10)
-- end
-- end
-- end
-- werror("out of range immediate `"..imm.."'")
-- elseif r64 then
-- waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
-- actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
-- return 0
-- else
-- waction("IMM13W", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm6(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- if n >= 0 and n <= 63 then
-- return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMM6", 0, imm)
-- return 0
-- end
--end
--
--local function parse_imm_load(imm, scale)
-- local n = parse_number(imm)
-- if n then
-- local m = sar(n, scale)
-- if shl(m, scale) == n and m >= 0 and m < 0x1000 then
-- return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
-- elseif n >= -256 and n < 256 then
-- return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- waction("IMML", 0, imm)
-- return 0
-- end
--end
--
--local function parse_fpimm(imm)
-- imm = match(imm, "^#(.*)$")
-- if not imm then werror("expected immediate operand") end
-- local n = parse_number(imm)
-- if n then
-- local m, e = math.frexp(n)
-- local s, e2 = 0, band(e-2, 7)
-- if m < 0 then m = -m; s = 0x00100000 end
-- m = m*32-16
-- if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
-- return s + shl(e2, 17) + shl(m, 13)
-- end
-- werror("out of range immediate `"..imm.."'")
-- else
-- werror("NYI fpimm action")
-- end
--end
--
--local function parse_shift(expr)
-- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-- s = map_shift[s]
-- if not s then werror("expected shift operand") end
-- return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
--end
--
--local function parse_lslx16(expr)
-- local n = match(expr, "^lsl%s*#(%d+)$")
-- n = tonumber(n)
-- if not n then werror("expected shift operand") end
-- if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
-- werror("bad shift amount")
-- end
-- return shl(n, 17)
--end
--
--local function parse_extend(expr)
-- local s, s2 = match(expr, "^(%S+)%s*(.*)$")
-- if s == "lsl" then
-- s = parse_reg_type == "x" and 3 or 2
-- else
-- s = map_extend[s]
-- end
-- if not s then werror("expected extend operand") end
-- return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
-+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
-+-- Encoded as: bddd
-+local function parse_mem_b(arg)
-+ werror("parse_mem_b: not implemented")
-+ return nil
- end
-
--local function parse_cond(expr, inv)
-- local c = map_cond[expr]
-- if not c then werror("expected condition operand") end
-- return shl(bit.bxor(c, inv), 12)
-+-- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
-+-- are GPRs.
-+-- Encoded as: xbddd
-+local function parse_mem_bx(arg)
-+ werror("parse_mem_bx: not implemented")
-+ return nil
- end
-
--local function parse_load(params, nparams, n, op)
-- if params[n+2] then werror("too many operands") end
-- local pn, p2 = params[n], params[n+1]
-- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-- if not p1 then
-- if not p2 then
-- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-- if reg and tailr ~= "" then
-- local base, tp = parse_reg_base(reg)
-- if tp then
-- waction("IMML", 0, format(tp.ctypefmt, tailr))
-- return op + base
-- end
-- end
-- end
-- werror("expected address operand")
-- end
-- local scale = shr(op, 30)
-- if p2 then
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
-- elseif wb == "!" then
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-- if not p1a then werror("bad use of '!'") end
-- op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
-- else
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
-- op = op + parse_reg_base(p1a)
-- if p2a ~= "" then
-- local imm = match(p2a, "^,%s*#(.*)$")
-- if imm then
-- op = op + parse_imm_load(imm, scale)
-- else
-- local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-- op = op + shl(parse_reg(p2b), 16) + 0x00200800
-- if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
-- werror("bad index register type")
-- end
-- if p3b == "" then
-- if parse_reg_type ~= "x" then werror("bad index register type") end
-- op = op + 0x6000
-- else
-- if p3s == "" or p3s == "#0" then
-- elseif p3s == "#"..scale then
-- op = op + 0x1000
-- else
-- werror("bad scale")
-- end
-- if parse_reg_type == "x" then
-- if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
-- elseif p3b == "sxtx" then op = op + 0xe000
-- else
-- werror("bad extend/shift specifier")
-- end
-- else
-- if p3b == "uxtw" then op = op + 0x4000
-- elseif p3b == "sxtw" then op = op + 0xc000
-- else
-- werror("bad extend/shift specifier")
-- end
-- end
-- end
-- end
-- else
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + 0x01000000
-- end
-- end
-- return op
-+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-+-- b is a GPR.
-+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_by(arg)
-+ werror("parse_mem_by: not implemented")
-+ return nil
- end
-
--local function parse_load_pair(params, nparams, n, op)
-- if params[n+2] then werror("too many operands") end
-- local pn, p2 = params[n], params[n+1]
-- local scale = shr(op, 30) == 0 and 2 or 3
-- local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
-- if not p1 then
-- if not p2 then
-- local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
-- if reg and tailr ~= "" then
-- local base, tp = parse_reg_base(reg)
-- if tp then
-- waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
-- return op + base + 0x01000000
-- end
-- end
-- end
-- werror("expected address operand")
-- end
-- if p2 then
-- if wb == "!" then werror("bad use of '!'") end
-- op = op + 0x00800000
-- else
-- local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
-- if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
-- op = op + (wb == "!" and 0x01800000 or 0x01000000)
-- end
-- return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
-+-- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
-+-- and b and x are GPRs.
-+-- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_bxy(arg)
-+ werror("parse_mem_bxy: not implemented")
-+ return nil
- end
-
- local function parse_label(label, def)
-@@ -613,33 +365,6 @@ local function op_alias(opname, f)
- end
- end
-
--local function alias_bfx(p)
-- p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
--end
--
--local function alias_bfiz(p)
-- parse_reg(p[1])
-- if parse_reg_type == "w" then
-- p[3] = "#-("..p[3]:sub(2)..")%32"
-- p[4] = "#("..p[4]:sub(2)..")-1"
-- else
-- p[3] = "#-("..p[3]:sub(2)..")%64"
-- p[4] = "#("..p[4]:sub(2)..")-1"
-- end
--end
--
--local alias_lslimm = op_alias("ubfm_4", function(p)
-- parse_reg(p[1])
-- local sh = p[3]:sub(2)
-- if parse_reg_type == "w" then
-- p[3] = "#-("..sh..")%32"
-- p[4] = "#31-("..sh..")"
-- else
-- p[3] = "#-("..sh..")%64"
-- p[4] = "#63-("..sh..")"
-- end
--end)
--
- -- Template strings for s390x instructions.
- map_op = {
- a_4 = "000000005a000000j",
-@@ -1226,11 +951,11 @@ local function parse_template(params, template, nparams, pos)
- for p in gmatch(sub(template, 17), ".") do
- local pr1,pr2,pr3
- if p == "g" then
-- pr1,pr2=param[n],param[n+1]
-+ pr1,pr2=params[n],params[n+1]
- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
- elseif p == "h" then
-- pr1,pr2=param[n],param[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ pr1,pr2=params[n],params[n+1]
-+ op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- elseif p == "j" then
- op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
-
-From d97dea2e3fc4ed351a45e056137ad7fae7a59547 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 29 Nov 2016 16:29:42 -0500
-Subject: [PATCH 041/260] Add a description of how immediate actions should be
- encoded.
-
-Also sets the action list type to unsigned short (uint16_t) which
-I think is the most appropriate type for s390x (x86 uses uint8_t
-and other platforms use uint32_t).
----
- dynasm/dasm_s390x.h | 4 ++--
- dynasm/dasm_s390x.lua | 13 +++++++++++++
- 2 files changed, 15 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index b327e7a60..254db8b87 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -9,7 +9,7 @@
- #include <string.h>
- #include <stdlib.h>
-
--#define DASM_ARCH "s390"
-+#define DASM_ARCH "s390x"
-
- #ifndef DASM_EXTERN
- #define DASM_EXTERN(a,b,c,d) 0
-@@ -49,7 +49,7 @@ enum {
- #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
-
- /* Action list type. */
--typedef const unsigned int *dasm_ActList;
-+typedef const unsigned short *dasm_ActList;
-
- /* Per-section structure. */
- typedef struct dasm_Section {
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 556f7fe4d..c73e317e3 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -970,6 +970,19 @@ local function parse_template(params, template, nparams, pos)
-
- end
- end
-+
-+ -- TODO
-+ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-+ -- one of two locations relative to the end of the instruction.
-+ -- To make decoding easier we should insert the actions for these immediately
-+ -- after the halfword they modify.
-+ -- For example, take the instruction ahik, which is laid out as follows (each
-+ -- char is 4 bits):
-+ -- o = op code, r = register, i = immediate
-+ -- oorr iiii 00oo
-+ -- This should be emitted as oorr, followed by the immediate action, followed by
-+ -- 00oo.
-+
- wputpos(pos, op)
- end
- function op_template(params, template, nparams)
-
-From 000b1a84f099ff001fa8dd6c5e2ff32b115975ed Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Tue, 29 Nov 2016 18:06:59 -0500
-Subject: [PATCH 042/260] Breakup instructions and action list into halfword
- chunks.
-
-This should allow us to encode the instructions relatively naturally
-and efficiently. For now I've escaped halfwords with a value <=
-the maximum action. This means that 0 is escaped which probably
-isn't ideal, so we may want to revisit that decision at some point.
----
- dynasm/dasm_s390x.lua | 100 ++++++++++++++++++++++--------------------
- 1 file changed, 53 insertions(+), 47 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c73e317e3..ef7f35e51 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -48,8 +48,10 @@ local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
-
- -- Action name -> action number.
- local map_action = {}
-+local max_action = 0
- for n,name in ipairs(action_names) do
- map_action[name] = n-1
-+ max_action = n
- end
-
- -- Action list buffer.
-@@ -77,25 +79,35 @@ end
- local function writeactions(out, name)
- local nn = #actlist
- if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
-- out:write("static const unsigned int ", name, "[", nn, "] = {\n")
-- for i = 1,nn-1 do
-- assert(out:write("0x", tohex(actlist[i]), ",\n"))
-+ out:write("static const unsigned short ", name, "[", nn, "] = {")
-+ local esc = false -- also need to escape for action arguments
-+ for i = 1,nn do
-+ assert(out:write("\n 0x", sub(tohex(actlist[i]), 5, 8)))
-+ if i ~= nn then assert(out:write(",")) end
-+ local name = action_names[actlist[i]+1]
-+ if not esc and name then
-+ assert(out:write(" /* ", name, " */"))
-+ esc = name == "ESC" or name == "SECTION"
-+ else
-+ esc = false
-+ end
- end
-- assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
-+ assert(out:write("\n};\n\n"))
- end
-
- ------------------------------------------------------------------------------
-
---- Add word to action list.
--local function wputxw(n)
-- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range") -- s390x inst can be 6 bytes
-+-- Add halfword to action list.
-+local function wputxhw(n)
-+ assert(n >= 0 and n <= 0xffff, "halfword out of range")
- actlist[#actlist+1] = n
- end
-
- -- Add action to list with optional arg. Advance buffer pos, too.
- local function waction(action, val, a, num)
- local w = assert(map_action[action], "bad action name `"..action.."'")
-- wputxw(w * 0x10000 + (val or 0))
-+ wputxhw(w)
-+ if val then wputxhw(val) end -- Not sure about this, do we always have one arg?
- if a then actargs[#actargs+1] = a end
- if a or num then secpos = secpos + (num or 1) end
- end
-@@ -109,29 +121,19 @@ local function wflush(term)
- secpos = 1 -- The actionlist offset occupies a buffer position, too.
- end
-
---- Put escaped word. --Need to check this as well, not sure how it will work on s390x
--local function wputw(n)
-- if n <= 0x000fffff then waction("ESC") end
-- wputxw(n)
-+-- Put escaped halfword.
-+local function wputhw(n)
-+ if n <= max_action then waction("ESC") end
-+ wputxhw(n)
- end
-
---- Reserve position for word.
-+-- Reserve position for halfword.
- local function wpos()
- local pos = #actlist+1
- actlist[pos] = ""
- return pos
- end
-
---- Store word to reserved position. -- added 2 bytes more since s390x has 6 bytes inst as well
--local function wputpos(pos, n)
-- assert(n >= 0 and n <= 0xffffffffffff and n % 1 == 0, "word out of range")
-- if n <= 0x000fffff then
-- insert(actlist, pos+1, n)
-- n = map_action.ESC * 0x10000
-- end
-- actlist[pos] = n
--end
--
- ------------------------------------------------------------------------------
-
- -- Global label name -> global label number. With auto assignment on 1st use.
-@@ -942,26 +944,44 @@ end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
- local function parse_template(params, template, nparams, pos)
-- local op = tonumber(sub(template, 1, 16), 16) --
-- -- 00000000005a0000 converts to 90
-+ -- Read the template in 16-bit chunks.
-+ -- Leading halfword zeroes should not be written out.
-+ local op0 = tonumber(sub(template, 5, 8), 16)
-+ local op1 = tonumber(sub(template, 9, 12), 16)
-+ local op2 = tonumber(sub(template, 13, 16), 16)
-+
- local n,rs = 1,26
-
- parse_reg_type = false
- -- Process each character.
-+ -- TODO
-+ -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-+ -- one of two locations relative to the end of the instruction.
-+ -- To make decoding easier we should insert the actions for these immediately
-+ -- after the halfword they modify.
-+ -- For example, take the instruction ahik, which is laid out as follows (each
-+ -- char is 4 bits):
-+ -- o = op code, r = register, i = immediate
-+ -- oorr iiii 00oo
-+ -- This should be emitted as oorr, followed by the immediate action, followed by
-+ -- 00oo.
- for p in gmatch(sub(template, 17), ".") do
-- local pr1,pr2,pr3
-+ local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=params[n],params[n+1]
-- op = op + shl(parse_reg(pr1),4) + parse_reg(pr2); n = n + 1 -- not sure if we will require n later, so keeping it as it is now
-+ op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ wputhw(op2)
- elseif p == "h" then
- pr1,pr2=params[n],params[n+1]
-- op = op + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
-+ wputhw(op1); wputhw(op2)
- elseif p == "j" then
-- op = op + shl(parse_reg(param[1]),24) + shl(parse_reg(param[2]),20) + shl(parse_reg(param[3]),16) + parse_number(param[4])
-- -- assuming that the parameters are passes in order (R1,X2,B2,D) --only RX-a is satisfied
-+ op1 = op1 + shl(parse_reg(param[1], 8))
-+ wputhw(op1); wputhw(op2)
-+ -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
-+ -- into op2, emitting an action for the DISP12 afterwards if necessary.
- elseif p == "k" then
-- op = op + shl(parse_reg(param[1]),40) + shl(parse_reg(param[2]),36) + shl(parse_reg(param[3]),32) + parse_number(param[4]) + parse_number(param[5])
-- -- assuming params are passed as (R1,X2,B2,DL2,DH2)
-+
- elseif p == "l" then
-
- elseif p == "m" then
-@@ -971,30 +991,16 @@ local function parse_template(params, template, nparams, pos)
- end
- end
-
-- -- TODO
-- -- 12-bit displacements (DISP12) and 16-bit immediates (IMM16) can be put at
-- -- one of two locations relative to the end of the instruction.
-- -- To make decoding easier we should insert the actions for these immediately
-- -- after the halfword they modify.
-- -- For example, take the instruction ahik, which is laid out as follows (each
-- -- char is 4 bits):
-- -- o = op code, r = register, i = immediate
-- -- oorr iiii 00oo
-- -- This should be emitted as oorr, followed by the immediate action, followed by
-- -- 00oo.
--
-- wputpos(pos, op)
- end
- function op_template(params, template, nparams)
- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
- -- Limit number of section buffer positions used by a single dasm_put().
- -- A single opcode needs a maximum of 3 positions.
- if secpos+3 > maxsecpos then wflush() end
-- local pos = wpos()
- local lpos, apos, spos = #actlist, #actargs, secpos
- local ok, err
- for t in gmatch(template, "[^|]+") do
-- ok, err = pcall(parse_template, params, t, nparams, pos)
-+ ok, err = pcall(parse_template, params, t, nparams)
- if ok then return end
- secpos = spos
- actlist[lpos+1] = nil
-
-From cf225d27cc56c31487638579857594b8dae4783b Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Wed, 30 Nov 2016 14:11:01 -0500
-Subject: [PATCH 043/260] Fix C code in header file and handle br template.
-
-This means that code like this can now be generated on s390x:
-
-| ar r2, r3
-| br r14
-
-Still need to add support for immediates, memory, labels, other
-instructions and so on.
----
- dynasm/dasm_s390x.h | 56 +++++++++++++++++++------------------------
- dynasm/dasm_s390x.lua | 13 ++++++----
- 2 files changed, 33 insertions(+), 36 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 254db8b87..837a2ed0d 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -186,10 +186,10 @@ void dasm_put(Dst_DECL, int start, ...)
-
- va_start(ap, start);
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- if (action >= DASM__MAX) {
-- ofs += 4;
-+ ofs += 2;
- } else {
- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
- switch (action) {
-@@ -231,22 +231,11 @@ void dasm_put(Dst_DECL, int start, ...)
- *pl = -pos; /* Label exists now. */
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
-- case DASM_IMM:
--#ifdef DASM_CHECKS
-- CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
--#endif
-- n >>= ((ins>>10)&31);
--#ifdef DASM_CHECKS
-- if (ins & 0x8000)
-- CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
-- else
-- CK((n>>((ins>>5)&31)) == 0, RANGE_I);
--#endif
-- b[pos++] = n;
-- break;
-- case DASM_IMMSH:
-- CK((n >> 6) == 0, RANGE_I);
-- b[pos++] = n;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
- break;
- }
- }
-@@ -294,8 +283,8 @@ int dasm_link(Dst_DECL, size_t *szp)
- while (pos != lastpos) {
- dasm_ActList p = D->actionlist + b[pos++];
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- switch (action) {
- case DASM_STOP: case DASM_SECTION: goto stop;
- case DASM_ESC: p++; break;
-@@ -303,7 +292,12 @@ int dasm_link(Dst_DECL, size_t *szp)
- case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
- case DASM_REL_LG: case DASM_REL_PC: pos++; break;
- case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
-- case DASM_IMM: case DASM_IMMSH: pos++; break;
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
-+ break;
- }
- }
- stop: (void)0;
-@@ -328,7 +322,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- {
- dasm_State *D = Dst_REF;
- char *base = (char *)buffer;
-- unsigned int *cp = (unsigned int *)buffer;
-+ unsigned short *cp = (unsigned short *)buffer;
- int secnum;
-
- /* Encode all code sections. No support for data sections (yet). */
-@@ -340,8 +334,8 @@ int dasm_encode(Dst_DECL, void *buffer)
- while (b != endb) {
- dasm_ActList p = D->actionlist + *b++;
- while (1) {
-- unsigned int ins = *p++;
-- unsigned int action = (ins >> 16);
-+ unsigned short ins = *p++;
-+ unsigned short action = ins;
- int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
- switch (action) {
- case DASM_STOP: case DASM_SECTION: goto stop;
-@@ -350,7 +344,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1) - 4;
- goto patchrel;
- case DASM_ALIGN:
-- ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
-+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x0707;
- break;
- case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
-@@ -367,11 +361,11 @@ int dasm_encode(Dst_DECL, void *buffer)
- ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
- break;
- case DASM_LABEL_PC: break;
-- case DASM_IMM:
-- cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
-- break;
-- case DASM_IMMSH:
-- cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
-+ case DASM_IMM16:
-+ case DASM_IMM32:
-+ case DASM_DISP20:
-+ case DASM_DISP12:
-+ fprintf(stderr, "not implemented\n");
- break;
- default: *cp++ = ins; break;
- }
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index ef7f35e51..52acbdbd6 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -933,13 +933,13 @@ for cond,c in pairs(map_cond) do
- -- Extended mnemonics for branches.
- -- TODO: replace 'B' with correct encoding.
- -- brc
-- map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."B"
-+ map_op["j"..cond.."_1"] = "00000000"..tohex(0xa7040000+shl(c, 20)).."w"
- -- brcl
-- map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."B"
-+ map_op["jg"..cond.."_1"] = tohex(0xc004+shl(c, 4)).."00000000".."x"
- -- bc
-- map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."B"
-+ map_op["b"..cond.."_1"] = "00000000"..tohex(0x47000000+shl(c, 20)).."y"
- -- bcr
-- map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."B"
-+ map_op["b"..cond.."r_1"] = "00000000"..tohex(0x0700+shl(c, 4)).."z"
- end
- ------------------------------------------------------------------------------
- -- Handle opcodes defined with template strings.
-@@ -969,7 +969,7 @@ local function parse_template(params, template, nparams, pos)
- local pr1,pr2,pr3
- if p == "g" then
- pr1,pr2=params[n],params[n+1]
-- op2 = op2 + shl(parse_reg(pr1),4) + parse_reg(pr2)
-+ op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- wputhw(op2)
- elseif p == "h" then
- pr1,pr2=params[n],params[n+1]
-@@ -988,6 +988,9 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
-
-+ elseif p == "z" then
-+ op2 = op2 + parse_gpr(params[1])
-+ wputhw(op2)
- end
- end
-
-
-From a34bcf9ef464b9e599efca9bb762b181f8c18c3d Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Wed, 30 Nov 2016 16:05:36 -0500
-Subject: [PATCH 044/260] Add initial support for D(B,X) memory operands
- (12-bit only).
-
-Most RX instructions don't specify the correct number of operands
-so this won't work on many yet. It also won't yet emit an action
-if D is a variable rather than a constant.
----
- dynasm/dasm_s390x.lua | 60 ++++++++++++++++++++++++++++++++++++++-----
- 1 file changed, 54 insertions(+), 6 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 52acbdbd6..eac9d6032 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -281,6 +281,32 @@ local function parse_number(n)
- return nil
- end
-
-+local function is_uint12(num)
-+ return 0 <= num and num < 4096
-+end
-+
-+local function is_int20(num)
-+ return -shl(1, 19) <= num and num < shl(1, 19)
-+end
-+
-+-- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
-+-- If x is not specified then it is 0.
-+local function split_memop(arg)
-+ local reg = "r[0-1]?[0-9]"
-+ local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
-+ if d then
-+ return d, parse_gpr(x), parse_gpr(b)
-+ end
-+ local d, b = match(arg, "^(.*)%(("..reg..")%)$")
-+ if d then
-+ return d, 0, parse_gpr(b)
-+ end
-+ -- TODO: handle values without registers?
-+ -- TODO: handle registers without a displacement?
-+ werror("bad memory operand: "..arg)
-+ return nil
-+end
-+
- -- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
- -- Encoded as: bddd
- local function parse_mem_b(arg)
-@@ -292,6 +318,17 @@ end
- -- are GPRs.
- -- Encoded as: xbddd
- local function parse_mem_bx(arg)
-+ local d, x, b = split_memop(arg)
-+ local dval = tonumber(d)
-+ if dval then
-+ if not is_uint12(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ return dval, x, b, nil
-+ end
-+ -- TODO: handle d being a symbol.
-+ -- Action is currently the final return value (the caller needs to add it
-+ -- to the action list at a later point).
- werror("parse_mem_bx: not implemented")
- return nil
- end
-@@ -369,7 +406,7 @@ end
-
- -- Template strings for s390x instructions.
- map_op = {
--a_4 = "000000005a000000j",
-+a_2 = "000000005a000000j",
- ar_2 = "0000000000001a00g",
- ay_5 = "0000e3000000005al",
- ag_5 = "0000e30000000008l",
-@@ -853,7 +890,7 @@ sqdbr_2 = "00000000b3150000h",
- sqdr_2 = "00000000b2440000h",
- sqebr_2 = "00000000b3140000h",
- sqer_2 = "00000000b2450000h",
--st_4 = "0000000050000000j",
-+st_2 = "0000000050000000j",
- sty_5 = "0000e30000000050l",
- stg_5 = "0000e30000000024l",
- std_4 = "0000000060000000j",
-@@ -976,10 +1013,13 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(parse_gpr(pr1),4) + parse_gpr(pr2)
- wputhw(op1); wputhw(op2)
- elseif p == "j" then
-- op1 = op1 + shl(parse_reg(param[1], 8))
-- wputhw(op1); wputhw(op2)
-- -- TODO: parse param[2] using parse_mem_bx, need to put x into op1, b and d
-- -- into op2, emitting an action for the DISP12 afterwards if necessary.
-+ local d, x, b, a = parse_mem_bx(params[2])
-+ op1 = op1 + shl(parse_gpr(params[1]), 4) + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "k" then
-
- elseif p == "l" then
-@@ -988,6 +1028,14 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
-
-+ elseif p == "y" then
-+ local d, x, b, a = parse_mem_bx(params[1])
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
-
-From 575c9075448b26316195c26dbcc600656ad44849 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Wed, 30 Nov 2016 17:07:17 -0500
-Subject: [PATCH 045/260] Minor cleanup of regular expressions.
-
----
- dynasm/dasm_s390x.lua | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index eac9d6032..c15719b73 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -242,7 +242,7 @@ local map_cond = {
- local parse_reg_type
-
- local function parse_gpr(expr)
-- local r = match(expr, "^r([1-3]?[0-9])$")
-+ local r = match(expr, "^r(1?[0-9])$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r, tp end
-@@ -251,7 +251,7 @@ local function parse_gpr(expr)
- end
-
- local function parse_fpr(expr)
-- local r = match(expr, "^f([1-3]?[0-9])$")
-+ local r = match(expr, "^f(1?[0-9])$")
- if r then
- r = tonumber(r)
- if r <= 15 then return r end
-@@ -292,7 +292,7 @@ end
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-- local reg = "r[0-1]?[0-9]"
-+ local reg = "r1?[0-9]"
- local d, x, b = match(arg, "^(.*)%(("..reg.."), ("..reg..")%)$")
- if d then
- return d, parse_gpr(x), parse_gpr(b)
-
-From dd6448ff1e7242c3ad4a1f21823143cfb104349d Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Thu, 1 Dec 2016 15:15:06 +0530
-Subject: [PATCH 046/260] Changed the templates based on no of arguments
-
-Have changed the templates based on number of parameters passed, mainly the memory and immediate ones are modified.
----
- dynasm/dasm_s390x.lua | 552 +++++++++++++++++++++---------------------
- 1 file changed, 276 insertions(+), 276 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c15719b73..467e21828 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -408,148 +408,148 @@ end
- map_op = {
- a_2 = "000000005a000000j",
- ar_2 = "0000000000001a00g",
--ay_5 = "0000e3000000005al",
--ag_5 = "0000e30000000008l",
-+ay_2 = "0000e3000000005al",
-+ag_2 = "0000e30000000008l",
- agr_2 = "00000000b9080000h",
--agf_5 = "0000e30000000018l",
-+agf_2 = "0000e30000000018l",
- agfr_2 = "00000000b9180000h",
- axbr_2 = "00000000b34a0000h",
- adbr_2 = "00000000b31a0000h",
- aebr_2 = "00000000b30a0000h",
--ah_4 = "000000004a000000j",
--ahy_5 = "0000e3000000007al",
--afi_3 = "0000c20900000000n",
--agfi_3 = "0000c20800000000n",
--aih_3 = "0000cc0800000000n",
--al_4 = "000000005e000000j",
-+ah_2 = "000000004a000000j",
-+ahy_2 = "0000e3000000007al",
-+afi_2 = "0000c20900000000n",
-+agfi_2 = "0000c20800000000n",
-+aih_2 = "0000cc0800000000n",
-+al_2 = "000000005e000000j",
- alr_2 = "0000000000001e00g",
--aly_5 = "0000e3000000005el",
--alg_5 = "0000e3000000000al",
-+aly_2 = "0000e3000000005el",
-+alg_2 = "0000e3000000000al",
- algr_2 = "00000000b90a0000h",
--algf_5 = "0000e3000000001al",
-+algf_2 = "0000e3000000001al",
- algfr_2 = "00000000b91a0000h",
--alfi_3 = "0000c20b00000000n",
--algfi_3 = "0000c20a00000000n",
--alc_5 = "0000e30000000098l",
-+alfi_2 = "0000c20b00000000n",
-+algfi_2 = "0000c20a00000000n",
-+alc_2 = "0000e30000000098l",
- alcr_2 = "00000000b9980000h",
--alcg_5 = "0000e30000000088l",
-+alcg_2 = "0000e30000000088l",
- alcgr_2 = "00000000b9880000h",
--alsih_3 = "0000cc0a00000000n",
--alsihn_3 = "0000cc0b00000000n",
-+alsih_2 = "0000cc0a00000000n",
-+alsihn_2 = "0000cc0b00000000n",
- axr_2 = "0000000000003600g",
--ad_4 = "000000006a000000j",
-+ad_2 = "000000006a000000j",
- adr_2 = "0000000000002a00g",
--ae_4 = "000000007a000000j",
-+ae_2 = "000000007a000000j",
- aer_2 = "0000000000003a00g",
--aw_4 = "000000006e000000j",
-+aw_2 = "000000006e000000j",
- awr_2 = "0000000000002e00g",
--au_4 = "000000007e000000j",
-+au_2 = "000000007e000000j",
- aur_2 = "0000000000003e00g",
--n_4 = "0000000054000000j",
-+n_2 = "0000000054000000j",
- nr_2 = "0000000000001400g",
--ny_5 = "0000e30000000054l",
--ng_5 = "0000e30000000080l",
-+ny_2 = "0000e30000000054l",
-+ng_2 = "0000e30000000080l",
- ngr_2 = "00000000b9800000h",
--nihf_3 = "0000c00a00000000n",
--nilf_3 = "0000c00b00000000n",
--bal_4 = "0000000045000000j",
-+nihf_2 = "0000c00a00000000n",
-+nilf_2 = "0000c00b00000000n",
-+bal_2 = "0000000045000000j",
- balr_2 = "000000000000500g",
--bas_4 = "000000004d000000j",
-+bas_2 = "000000004d000000j",
- basr_2 = "0000000000000d00g",
- bassm_2 = "0000000000000c00g",
- bsa_2 = "00000000b25a0000h",
- bsm_2 = "0000000000000b00g",
- bakr_2 = "00000000b2400000h",
- bsg_2 = "00000000b2580000h",
--bc_4 = "0000000047000000k",
-+bc_2 = "0000000047000000k",
- bcr_2 = "000000000000700g",
--bct_4 = "0000000046000000j",
-+bct_2 = "0000000046000000j",
- bctr_2 = "000000000000600g",
--bctg_5 = "0000e30000000046l",
-+bctg_2 = "0000e30000000046l",
- bctgr_2 = "00000000b9460000h",
--bxh_4 = "0000000086000000q",
--bxhg_5 = "0000eb0000000044s",
--bxle_4 = "0000000087000000q",
--bxleg_5 = "0000eb0000000045s",
--brasl_3 = "0000c00500000000o",
--brcl_3 = "0000c00400000000p",
--brcth_3 = "0000cc0600000000o",
-+bxh_3 = "0000000086000000q",
-+bxhg_3 = "0000eb0000000044s",
-+bxle_3 = "0000000087000000q",
-+bxleg_3 = "0000eb0000000045s",
-+brasl_2 = "0000c00500000000o",
-+brcl_2 = "0000c00400000000p",
-+brcth_2 = "0000cc0600000000o",
- cksm_2 = "00000000b2410000h",
- km_2 = "00000000b92e0000h",
- kmf_2 = "00000000b92a0000h",
- kmc_2 = "00000000b92f0000h",
- kmo_2 = "00000000b92b0000h",
--c_4 = "0000000059000000j",
-+c_2 = "0000000059000000j",
- cr_2 = "0000000000001900g",
--cy_5 = "0000e30000000059l",
--cg_5 = "0000e30000000020l",
-+cy_2 = "0000e30000000059l",
-+cg_2 = "0000e30000000020l",
- cgr_2 = "00000000b9200000h",
--cgf_5 = "0000e30000000030l",
-+cgf_2 = "0000e30000000030l",
- cgfr_2 = "00000000b9300000h",
- cxbr_2 = "00000000b3490000h",
- cxtr_2 = "00000000b3ec0000h",
- cxr_2 = "00000000b3690000h",
- cdbr_2 = "00000000b3190000h",
- cdtr_2 = "00000000b3e40000h",
--cd_4 = "0000000069000000j",
-+cd_2 = "0000000069000000j",
- cdr_2 = "0000000000002900g",
- cebr_2 = "00000000b3090000h",
--ce_4 = "0000000079000000j",
-+ce_2 = "0000000079000000j",
- cer_2 = "0000000000003900g",
- kxbr_2 = "00000000b3480000h",
- kxtr_2 = "00000000b3e80000h",
- kdbr_2 = "00000000b3180000h",
- kdtr_2 = "00000000b3e00000h",
- kebr_2 = "00000000b3080000h",
--cs_4 = "00000000ba000000q",
--csy_5 = "0000eb0000000014s",
--csg_5 = "0000eb0000000030s",
-+cs_3 = "00000000ba000000q",
-+csy_3 = "0000eb0000000014s",
-+csg_3 = "0000eb0000000030s",
- csp_2 = "00000000b2500000h",
- cspg_2 = "00000000b98a0000h",
- cextr_2 = "00000000b3fc0000h",
- cedtr_2 = "00000000b3f40000h",
--cds_4 = "00000000bb000000q",
--cdsy_5 = "0000eb0000000031s",
--cdsg_5 = "0000eb000000003es",
--ch_4 = "0000000049000000j",
--chy_5 = "0000e30000000079l",
--cgh_5 = "0000e30000000034l",
--chrl_3 = "0000c60500000000o",
--cghrl_3 = "0000c60400000000o",
--chf_5 = "0000e300000000cdl",
-+cds_3 = "00000000bb000000q",
-+cdsy_3 = "0000eb0000000031s",
-+cdsg_3 = "0000eb000000003es",
-+ch_2 = "0000000049000000j",
-+chy_2 = "0000e30000000079l",
-+cgh_2 = "0000e30000000034l",
-+chrl_2 = "0000c60500000000o",
-+cghrl_2 = "0000c60400000000o",
-+chf_2 = "0000e300000000cdl",
- chhr_2 = "00000000b9cd0000h",
- chlr_2 = "00000000b9dd0000h",
--cfi_3 = "0000c20d00000000n",
--cgfi_3 = "0000c20c00000000n",
--cih_3 = "0000cc0d00000000n",
--cl_4 = "0000000055000000j",
-+cfi_2 = "0000c20d00000000n",
-+cgfi_2 = "0000c20c00000000n",
-+cih_2 = "0000cc0d00000000n",
-+cl_2 = "0000000055000000j",
- clr_2 = "0000000000001500g",
--cly_5 = "0000e30000000055l",
--clg_5 = "0000e30000000021l",
-+cly_2 = "0000e30000000055l",
-+clg_2 = "0000e30000000021l",
- clgr_2 = "00000000b9210000h",
--clgf_5 = "0000e30000000031l",
-+clgf_2 = "0000e30000000031l",
- clgfr_2 = "00000000b9310000h",
--clmh_5 = "0000eb0000000020t",
--clm_4 = "00000000bd000000r",
--clmy_5 = "0000eb0000000021t",
--clhf_5 = "0000e300000000cfl",
-+clmh_3 = "0000eb0000000020t",
-+clm_3 = "00000000bd000000r",
-+clmy_3 = "0000eb0000000021t",
-+clhf_2 = "0000e300000000cfl",
- clhhr_2 = "00000000b9cf0000h",
- clhlr_2 = "00000000b9df0000h",
--clfi_3 = "0000c20f00000000n",
--clgfi_3 = "0000c20e00000000n",
--clih_3 = "0000cc0f00000000n",
-+clfi_2 = "0000c20f00000000n",
-+clgfi_2 = "0000c20e00000000n",
-+clih_2 = "0000cc0f00000000n",
- clcl_2 = "0000000000000f00g",
--clcle_4 = "00000000a9000000q",
--clclu_5 = "0000eb000000008fs",
--clrl_3 = "0000c60f00000000o",
--clhrl_3 = "0000c60700000000o",
--clgrl_3 = "0000c60a00000000o",
--clghrl_3 = "0000c60600000000o",
--clgfrl_3 = "0000c60e00000000o",
-+clcle_3 = "00000000a9000000q",
-+clclu_3 = "0000eb000000008fs",
-+clrl_2 = "0000c60f00000000o",
-+clhrl_2 = "0000c60700000000o",
-+clgrl_2 = "0000c60a00000000o",
-+clghrl_2 = "0000c60600000000o",
-+clgfrl_2 = "0000c60e00000000o",
- clst_2 = "00000000b25d0000h",
--crl_3 = "0000c60d00000000o",
--cgrl_3 = "0000c60800000000o",
--cgfrl_3 = "0000c60c00000000o",
-+crl_2 = "0000c60d00000000o",
-+cgrl_2 = "0000c60800000000o",
-+cgfrl_2 = "0000c60c00000000o",
- cuse_2 = "00000000b2570000h",
- cmpsc_2 = "00000000b2630000h",
- kimd_2 = "00000000b93e0000h",
-@@ -577,49 +577,49 @@ cxstr_2 = "00000000b3fb0000h",
- cdstr_2 = "00000000b3f30000h",
- cxutr_2 = "00000000b3fa0000h",
- cdutr_2 = "00000000b3f20000h",
--cvb_4 = "000000004f000000j",
--cvby_5 = "0000e30000000006l",
--cvbg_5 = "0000e3000000000el",
--cvd_4 = "000000004e000000j",
--cvdy_5 = "0000e30000000026l",
--cvdg_5 = "0000e3000000002el",
-+cvb_2 = "000000004f000000j",
-+cvby_2 = "0000e30000000006l",
-+cvbg_2 = "0000e3000000000el",
-+cvd_2 = "000000004e000000j",
-+cvdy_2 = "0000e30000000026l",
-+cvdg_2 = "0000e3000000002el",
- cuxtr_2 = "00000000b3ea0000h",
- cudtr_2 = "00000000b3e20000h",
- cu42_2 = "00000000b9b30000h",
- cu41_2 = "00000000b9b20000h",
- cpya_2 = "00000000b24d0000h",
--d_4 = "000000005d000000j",
-+d_2 = "000000005d000000j",
- dr_2 = "0000000000001d00g",
- dxbr_2 = "00000000b34d0000h",
- dxr_2 = "00000000b22d0000h",
- ddbr_2 = "00000000b31d0000h",
--dd_4 = "000000006d000000j",
-+dd_2 = "000000006d000000j",
- ddr_2 = "0000000000002d00g",
- debr_2 = "00000000b30d0000h",
--de_4 = "000000007d000000j",
-+de_2 = "000000007d000000j",
- der_2 = "0000000000003d00g",
--dl_5 = "0000e30000000097l",
-+dl_2 = "0000e30000000097l",
- dlr_2 = "00000000b9970000h",
--dlg_5 = "0000e30000000087l",
-+dlg_2 = "0000e30000000087l",
- dlgr_2 = "00000000b9870000h",
--dsg_5 = "0000e3000000000dl",
-+dsg_2 = "0000e3000000000dl",
- dsgr_2 = "00000000b90d0000h",
--dsgf_5 = "0000e3000000001dl",
-+dsgf_2 = "0000e3000000001dl",
- dsgfr_2 = "00000000b91d0000h",
--x_4 = "0000000057000000j",
-+x_2 = "0000000057000000j",
- xr_2 = "0000000000001700g",
--xy_5 = "0000e30000000057l",
--xg_5 = "0000e30000000082l",
-+xy_2 = "0000e30000000057l",
-+xg_2 = "0000e30000000082l",
- xgr_2 = "00000000b9820000h",
--xihf_3 = "0000c00600000000n",
--xilf_3 = "0000c00700000000n",
--ex_4 = "0000000044000000j",
--exrl_3 = "0000c60000000000o",
-+xihf_2 = "0000c00600000000n",
-+xilf_2 = "0000c00700000000n",
-+ex_2 = "0000000044000000j",
-+exrl_2 = "0000c60000000000o",
- ear_2 = "00000000b24f0000h",
- esea_2 = "00000000b99d0000h",
- eextr_2 = "00000000b3ed0000h",
- eedtr_2 = "00000000b3e50000h",
--ecag_5 = "0000eb000000004cs",
-+ecag_3 = "0000eb000000004cs",
- efpc_2 = "00000000b38c0000h",
- epar_2 = "00000000b2260000h",
- epair_2 = "00000000b99a0000h",
-@@ -635,52 +635,52 @@ flogr_2 = "00000000b9830000h",
- hdr_2 = "0000000000002400g",
- her_2 = "0000000000003400g",
- iac_2 = "00000000b2240000h",
--ic_4 = "0000000043000000j",
--icy_5 = "0000e30000000073l",
--icmh_5 = "0000eb0000000080t",
--icm_4 = "00000000bf000000r",
--icmy_5 = "0000eb0000000081t",
--iihf_3 = "0000c00800000000n",
--iilf_3 = "0000c00900000000n",
-+ic_2 = "0000000043000000j",
-+icy_2 = "0000e30000000073l",
-+icmh_3 = "0000eb0000000080t",
-+icm_3 = "00000000bf000000r",
-+icmy_3 = "0000eb0000000081t",
-+iihf_2 = "0000c00800000000n",
-+iilf_2 = "0000c00900000000n",
- ipm_2 = "00000000b2220000h",
- iske_2 = "00000000b2290000h",
- ivsk_2 = "00000000b2230000h",
--l_4 = "0000000058000000j",
-+l_2 = "0000000058000000j",
- lr_2 = "0000000000001800g",
--ly_5 = "0000e30000000058l",
--lg_5 = "0000e30000000004l",
-+ly_2 = "0000e30000000058l",
-+lg_2 = "0000e30000000004l",
- lgr_2 = "00000000b9040000h",
--lgf_5 = "0000e30000000014l",
-+lgf_2 = "0000e30000000014l",
- lgfr_2 = "00000000b9140000h",
- lxr_2 = "00000000b3650000h",
--ld_4 = "0000000068000000j",
-+ld_2 = "0000000068000000j",
- ldr_2 = "0000000000002800g",
--ldy_5 = "0000ed0000000065l",
--le_4 = "0000000078000000j",
-+ldy_2 = "0000ed0000000065l",
-+le_2 = "0000000078000000j",
- ler_2 = "0000000000003800g",
--ley_5 = "0000ed0000000064l",
--lam_4 = "000000009a000000q",
--lamy_5 = "0000eb000000009as",
--la_4 = "0000000041000000j",
--lay_5 = "0000e30000000071l",
--lae_4 = "0000000051000000j",
--laey_5 = "0000e30000000075l",
--larl_3 = "0000c00000000000o",
--laa_5 = "0000eb00000000f8s",
--laag_5 = "0000eb00000000e8s",
--laal_5 = "0000eb00000000fas",
--laalg_5 = "0000eb00000000eas",
--lan_5 = "0000eb00000000f4s",
--lang_5 = "0000eb00000000e4s",
--lax_5 = "0000eb00000000f7s",
--laxg_5 = "0000eb00000000e7s",
--lao_5 = "0000eb00000000f6s",
--laog_5 = "0000eb00000000e6s",
--lt_5 = "0000e30000000012l",
-+ley_2 = "0000ed0000000064l",
-+lam_3 = "000000009a000000q",
-+lamy_3 = "0000eb000000009as",
-+la_2 = "0000000041000000j",
-+lay_2 = "0000e30000000071l",
-+lae_2 = "0000000051000000j",
-+laey_2 = "0000e30000000075l",
-+larl_2 = "0000c00000000000o",
-+laa_3 = "0000eb00000000f8s",
-+laag_3 = "0000eb00000000e8s",
-+laal_3 = "0000eb00000000fas",
-+laalg_3 = "0000eb00000000eas",
-+lan_3 = "0000eb00000000f4s",
-+lang_3 = "0000eb00000000e4s",
-+lax_3 = "0000eb00000000f7s",
-+laxg_3 = "0000eb00000000e7s",
-+lao_3 = "0000eb00000000f6s",
-+laog_3 = "0000eb00000000e6s",
-+lt_2 = "0000e30000000012l",
- ltr_2 = "0000000000001200g",
--ltg_5 = "0000e30000000002l",
-+ltg_2 = "0000e30000000002l",
- ltgr_2 = "00000000b9020000h",
--ltgf_5 = "0000e30000000032l",
-+ltgf_2 = "0000e30000000032l",
- ltgfr_2 = "00000000b9120000h",
- ltxbr_2 = "00000000b3420000h",
- ltxtr_2 = "00000000b3de0000h",
-@@ -690,11 +690,11 @@ ltdtr_2 = "00000000b3d60000h",
- ltdr_2 = "0000000000002200g",
- ltebr_2 = "00000000b3020000h",
- lter_2 = "0000000000003200g",
--lb_5 = "0000e30000000076l",
-+lb_2 = "0000e30000000076l",
- lbr_2 = "00000000b9260000h",
--lgb_5 = "0000e30000000077l",
-+lgb_2 = "0000e30000000077l",
- lgbr_2 = "00000000b9060000h",
--lbh_5 = "0000e300000000c0l",
-+lbh_2 = "0000e300000000c0l",
- lcr_2 = "0000000000001300g",
- lcgr_2 = "00000000b9030000h",
- lcgfr_2 = "00000000b9130000h",
-@@ -705,52 +705,52 @@ lcdr_2 = "0000000000002300g",
- lcdfr_2 = "00000000b3730000h",
- lcebr_2 = "00000000b3030000h",
- lcer_2 = "0000000000003300g",
--lctl_4 = "00000000b7000000q",
--lctlg_5 = "0000eb000000002fs",
-+lctl_3 = "00000000b7000000q",
-+lctlg_3 = "0000eb000000002fs",
- fixr_2 = "00000000b3670000h",
- fidr_2 = "00000000b37f0000h",
- fier_2 = "00000000b3770000h",
- ldgr_2 = "00000000b3c10000h",
- lgdr_2 = "00000000b3cd0000h",
--lh_4 = "0000000048000000j",
-+lh_2 = "0000000048000000j",
- lhr_2 = "00000000b9270000h",
--lhy_5 = "0000e30000000078l",
--lgh_5 = "0000e30000000015l",
-+lhy_2 = "0000e30000000078l",
-+lgh_2 = "0000e30000000015l",
- lghr_2 = "00000000b9070000h",
--lhh_5 = "0000e300000000c4l",
--lhrl_3 = "0000c40500000000o",
--lghrl_3 = "0000c40400000000o",
--lfh_5 = "0000e300000000cal",
--lgfi_3 = "0000c00100000000n",
-+lhh_2 = "0000e300000000c4l",
-+lhrl_2 = "0000c40500000000o",
-+lghrl_2 = "0000c40400000000o",
-+lfh_2 = "0000e300000000cal",
-+lgfi_2 = "0000c00100000000n",
- lxdbr_2 = "00000000b3050000h",
- lxdr_2 = "00000000b3250000h",
- lxebr_2 = "00000000b3060000h",
- lxer_2 = "00000000b3260000h",
- ldebr_2 = "00000000b3040000h",
- lder_2 = "00000000b3240000h",
--llgf_5 = "0000e30000000016l",
-+llgf_2 = "0000e30000000016l",
- llgfr_2 = "00000000b9160000h",
--llc_5 = "0000e30000000094l",
-+llc_2 = "0000e30000000094l",
- llcr_2 = "00000000b9940000h",
--llgc_5 = "0000e30000000090l",
-+llgc_2 = "0000e30000000090l",
- llgcr_2 = "00000000b9840000h",
--llch_5 = "0000e300000000c2l",
--llh_5 = "0000e30000000095l",
-+llch_2 = "0000e300000000c2l",
-+llh_2 = "0000e30000000095l",
- llhr_2 = "00000000b9950000h",
--llgh_5 = "0000e30000000091l",
-+llgh_2 = "0000e30000000091l",
- llghr_2 = "00000000b9850000h",
--llhh_5 = "0000e300000000c6l",
--llhrl_3 = "0000c40200000000o",
--llghrl_3 = "0000c40600000000o",
--llihf_3 = "0000c00e00000000n",
--llilf_3 = "0000c00f00000000n",
--llgfrl_3 = "0000c40e00000000o",
--llgt_5 = "0000e30000000017l",
-+llhh_2 = "0000e300000000c6l",
-+llhrl_2 = "0000c40200000000o",
-+llghrl_2 = "0000c40600000000o",
-+llihf_2 = "0000c00e00000000n",
-+llilf_2 = "0000c00f00000000n",
-+llgfrl_2 = "0000c40e00000000o",
-+llgt_2 = "0000e30000000017l",
- llgtr_2 = "00000000b9170000h",
--lm_4 = "0000000098000000q",
--lmy_5 = "0000eb0000000098s",
--lmg_5 = "0000eb0000000004s",
--lmh_5 = "0000eb0000000096s",
-+lm_3 = "0000000098000000q",
-+lmy_3 = "0000eb0000000098s",
-+lmg_3 = "0000eb0000000004s",
-+lmh_3 = "0000eb0000000096s",
- lnr_2 = "0000000000001100g",
- lngr_2 = "00000000b9010000h",
- lngfr_2 = "00000000b9110000h",
-@@ -761,9 +761,9 @@ lndr_2 = "0000000000002100g",
- lndfr_2 = "00000000b3710000h",
- lnebr_2 = "00000000b3010000h",
- lner_2 = "0000000000003100g",
--loc_5 = "0000eb00000000f2t",
--locg_5 = "0000eb00000000e2t",
--lpq_5 = "0000e3000000008fl",
-+loc_3 = "0000eb00000000f2t",
-+locg_3 = "0000eb00000000e2t",
-+lpq_2 = "0000e3000000008fl",
- lpr_2 = "0000000000001000g",
- lpgr_2 = "00000000b9000000h",
- lpgfr_2 = "00000000b9100000h",
-@@ -774,16 +774,16 @@ lpdr_2 = "0000000000002000g",
- lpdfr_2 = "00000000b3700000h",
- lpebr_2 = "00000000b3000000h",
- lper_2 = "0000000000003000g",
--lra_4 = "00000000b1000000j",
--lray_5 = "0000e30000000013l",
--lrag_5 = "0000e30000000003l",
--lrl_3 = "0000c40d00000000o",
--lgrl_3 = "0000c40800000000o",
--lgfrl_3 = "0000c40c00000000o",
--lrvh_5 = "0000e3000000001fl",
--lrv_5 = "0000e3000000001el",
-+lra_2 = "00000000b1000000j",
-+lray_2 = "0000e30000000013l",
-+lrag_2 = "0000e30000000003l",
-+lrl_2 = "0000c40d00000000o",
-+lgrl_2 = "0000c40800000000o",
-+lgfrl_2 = "0000c40c00000000o",
-+lrvh_2 = "0000e3000000001fl",
-+lrv_2 = "0000e3000000001el",
- lrvr_2 = "00000000b91f0000h",
--lrvg_5 = "0000e3000000000fl",
-+lrvg_2 = "0000e3000000000fl",
- lrvgr_2 = "00000000b90f0000h",
- ldxbr_2 = "00000000b3450000h",
- ldxr_2 = "0000000000002500g",
-@@ -800,50 +800,50 @@ lzdr_2 = "00000000b3750000h",
- lzer_2 = "00000000b3740000h",
- msta_2 = "00000000b2470000h",
- mvcl_2 = "0000000000000e00g",
--mvcle_4 = "00000000a8000000q",
--mvclu_5 = "0000eb000000008es",
-+mvcle_3 = "00000000a8000000q",
-+mvclu_3 = "0000eb000000008es",
- mvpg_2 = "00000000b2540000h",
- mvst_2 = "00000000b2550000h",
--m_4 = "000000005c000000j",
--mfy_5 = "0000e3000000005cl",
-+m_2 = "000000005c000000j",
-+mfy_2 = "0000e3000000005cl",
- mr_2 = "0000000000001c00g",
- mxbr_2 = "00000000b34c0000h",
- mxr_2 = "0000000000002600g",
- mdbr_2 = "00000000b31c0000h",
--md_4 = "000000006c000000j",
-+md_2 = "000000006c000000j",
- mdr_2 = "0000000000002c00g",
- mxdbr_2 = "00000000b3070000h",
--mxd_4 = "0000000067000000j",
-+mxd_2 = "0000000067000000j",
- mxdr_2 = "0000000000002700g",
- meebr_2 = "00000000b3170000h",
- meer_2 = "00000000b3370000h",
- mdebr_2 = "00000000b30c0000h",
--mde_4 = "000000007c000000j",
-+mde_2 = "000000007c000000j",
- mder_2 = "0000000000003c00g",
--me_4 = "000000007c000000j",
-+me_2 = "000000007c000000j",
- mer_2 = "0000000000003c00g",
--mh_4 = "000000004c000000j",
--mhy_5 = "0000e3000000007cl",
--mlg_5 = "0000e30000000086l",
-+mh_2 = "000000004c000000j",
-+mhy_2 = "0000e3000000007cl",
-+mlg_2 = "0000e30000000086l",
- mlgr_2 = "00000000b9860000h",
--ml_5 = "0000e30000000096l",
-+ml_2 = "0000e30000000096l",
- mlr_2 = "00000000b9960000h",
--ms_4 = "0000000071000000j",
-+ms_2 = "0000000071000000j",
- msr_2 = "00000000b2520000h",
--msy_5 = "0000e30000000051l",
--msg_5 = "0000e3000000000cl",
-+msy_2 = "0000e30000000051l",
-+msg_2 = "0000e3000000000cl",
- msgr_2 = "00000000b90c0000h",
--msgf_5 = "0000e3000000001cl",
-+msgf_2 = "0000e3000000001cl",
- msgfr_2 = "00000000b91c0000h",
--msfi_3 = "0000c20100000000n",
--msgfi_3 = "0000c20000000000n",
--o_4 = "0000000056000000j",
--or_2 = "0000000000001600g",
--oy_5 = "0000e30000000056l",
--og_5 = "0000e30000000081l",
-+msfi_2 = "0000c20100000000n",
-+msgfi_2 = "0000c20000000000n",
-+o_2 = "0000000056000000j",
-+or_2 = "0000000000001600g",
-+oy_2 = "0000e30000000056l",
-+og_2 = "0000e30000000081l",
- ogr_2 = "00000000b9810000h",
--oihf_3 = "0000c00c00000000n",
--oilf_3 = "0000c00d00000000n",
-+oihf_2 = "0000c00c00000000n",
-+oilf_2 = "0000c00d00000000n",
- pgin_2 = "00000000b22e0000h",
- pgout_2 = "00000000b22f0000h",
- pcc_2 = "00000000b92c0000h",
-@@ -851,15 +851,15 @@ pckmo_2 = "00000000b9280000h",
- pfmf_2 = "00000000b9af0000h",
- ptf_2 = "00000000b9a20000h",
- popcnt_2 = "00000000b9e10000h",
--pfd_5 = "0000e30000000036m",
--pfdrl_3 = "0000c60200000000p",
-+pfd_2 = "0000e30000000036m",
-+pfdrl_2 = "0000c60200000000p",
- pt_2 = "00000000b2280000h",
- pti_2 = "00000000b99e0000h",
- palb_2 = "00000000b2480000h",
- rrbe_2 = "00000000b22a0000h",
- rrbm_2 = "00000000b9ae0000h",
--rll_5 = "0000eb000000001ds",
--rllg_5 = "0000eb000000001cs",
-+rll_3 = "0000eb000000001ds",
-+rllg_3 = "0000eb000000001cs",
- srst_2 = "00000000b25e0000h",
- srstu_2 = "00000000b9be0000h",
- sar_2 = "00000000b24e0000h",
-@@ -868,22 +868,22 @@ sfasr_2 = "00000000b3850000h",
- spm_2 = "000000000000400g",
- ssar_2 = "00000000b2250000h",
- ssair_2 = "00000000b99f0000h",
--slda_4 = "000000008f000000q",
--sldl_4 = "000000008d000000q",
--sla_4 = "000000008b000000q",
--slak_5 = "0000eb00000000dds",
--slag_5 = "0000eb000000000bs",
--sll_4 = "0000000089000000q",
--sllk_5 = "0000eb00000000dfs",
--sllg_5 = "0000eb000000000ds",
--srda_4 = "000000008e000000q",
--srdl_4 = "000000008c000000q",
--sra_4 = "000000008a000000q",
--srak_5 = "0000eb00000000dcs",
--srag_5 = "0000eb000000000as",
--srl_4 = "0000000088000000q",
--srlk_5 = "0000eb00000000des",
--srlg_5 = "0000eb000000000cs",
-+slda_3 = "000000008f000000q",
-+sldl_3 = "000000008d000000q",
-+sla_3 = "000000008b000000q",
-+slak_3 = "0000eb00000000dds",
-+slag_3 = "0000eb000000000bs",
-+sll_3 = "0000000089000000q",
-+sllk_3 = "0000eb00000000dfs",
-+sllg_3 = "0000eb000000000ds",
-+srda_3 = "000000008e000000q",
-+srdl_3 = "000000008c000000q",
-+sra_3 = "000000008a000000q",
-+srak_3 = "0000eb00000000dcs",
-+srag_3 = "0000eb000000000as",
-+srl_3 = "0000000088000000q",
-+srlk_3 = "0000eb00000000des",
-+srlg_3 = "0000eb000000000cs",
- sqxbr_2 = "00000000b3160000h",
- sqxr_2 = "00000000b3360000h",
- sqdbr_2 = "00000000b3150000h",
-@@ -891,79 +891,79 @@ sqdr_2 = "00000000b2440000h",
- sqebr_2 = "00000000b3140000h",
- sqer_2 = "00000000b2450000h",
- st_2 = "0000000050000000j",
--sty_5 = "0000e30000000050l",
--stg_5 = "0000e30000000024l",
--std_4 = "0000000060000000j",
--stdy_5 = "0000ed0000000067l",
--ste_4 = "0000000070000000j",
--stey_5 = "0000ed0000000066l",
--stam_4 = "000000009b000000q",
--stamy_5 = "0000eb000000009bs",
--stc_4 = "0000000042000000j",
--stcy_5 = "0000e30000000072l",
--stch_5 = "0000e300000000c3l",
--stcmh_5 = "0000eb000000002ct",
--stcm_4 = "00000000be000000r",
--stcmy_5 = "0000eb000000002dt",
--stctl_4 = "00000000b6000000q",
--stctg_5 = "0000eb0000000025s",
--sth_4 = "0000000040000000j",
--sthy_5 = "0000e30000000070l",
--sthh_5 = "0000e300000000c7l",
--sthrl_3 = "0000c40700000000o",
--stfh_5 = "0000e300000000cbl",
--stm_4 = "0000000090000000q",
--stmy_5 = "0000eb0000000090s",
--stmg_5 = "0000eb0000000024s",
--stmh_5 = "0000eb0000000026s",
--stoc_5 = "0000eb00000000f3t",
--stocg_5 = "0000eb00000000e3t",
--stpq_5 = "0000e3000000008el",
--strl_3 = "0000c40f00000000o",
--stgrl_3 = "0000c40b00000000o",
--strvh_5 = "0000e3000000003fl",
--strv_5 = "0000e3000000003el",
--strvg_5 = "0000e3000000002fl",
-+sty_2 = "0000e30000000050l",
-+stg_2 = "0000e30000000024l",
-+std_2 = "0000000060000000j",
-+stdy_2 = "0000ed0000000067l",
-+ste_2 = "0000000070000000j",
-+stey_2 = "0000ed0000000066l",
-+stam_3 = "000000009b000000q",
-+stamy_3 = "0000eb000000009bs",
-+stc_2 = "0000000042000000j",
-+stcy_2 = "0000e30000000072l",
-+stch_2 = "0000e300000000c3l",
-+stcmh_3 = "0000eb000000002ct",
-+stcm_3 = "00000000be000000r",
-+stcmy_3 = "0000eb000000002dt",
-+stctl_3 = "00000000b6000000q",
-+stctg_3 = "0000eb0000000025s",
-+sth_2 = "0000000040000000j",
-+sthy_2 = "0000e30000000070l",
-+sthh_2 = "0000e300000000c7l",
-+sthrl_2 = "0000c40700000000o",
-+stfh_2 = "0000e300000000cbl",
-+stm_3 = "0000000090000000q",
-+stmy_3 = "0000eb0000000090s",
-+stmg_3 = "0000eb0000000024s",
-+stmh_3 = "0000eb0000000026s",
-+stoc_3 = "0000eb00000000f3t",
-+stocg_3 = "0000eb00000000e3t",
-+stpq_2 = "0000e3000000008el",
-+strl_2 = "0000c40f00000000o",
-+stgrl_2 = "0000c40b00000000o",
-+strvh_2 = "0000e3000000003fl",
-+strv_2 = "0000e3000000003el",
-+strvg_2 = "0000e3000000002fl",
- stura_2 = "00000000b2460000h",
- sturg_2 = "00000000b9250000h",
--s_4 = "000000005b000000j",
-+s_2 = "000000005b000000j",
- sr_2 = "0000000000001b00g",
--sy_5 = "0000e3000000005bl",
--sg_5 = "0000e30000000009l",
-+sy_2 = "0000e3000000005bl",
-+sg_2 = "0000e30000000009l",
- sgr_2 = "00000000b9090000h",
--sgf_5 = "0000e30000000019l",
-+sgf_2 = "0000e30000000019l",
- sgfr_2 = "00000000b9190000h",
- sxbr_2 = "00000000b34b0000h",
- sdbr_2 = "00000000b31b0000h",
- sebr_2 = "00000000b30b0000h",
--sh_4 = "000000004b000000j",
--shy_5 = "0000e3000000007bl",
--sl_4 = "000000005f000000j",
-+sh_2 = "000000004b000000j",
-+shy_2 = "0000e3000000007bl",
-+sl_2 = "000000005f000000j",
- slr_2 = "0000000000001f00g",
--sly_5 = "0000e3000000005fl",
--slg_5 = "0000e3000000000bl",
-+sly_2 = "0000e3000000005fl",
-+slg_2 = "0000e3000000000bl",
- slgr_2 = "00000000b90b0000h",
--slgf_5 = "0000e3000000001bl",
-+slgf_2 = "0000e3000000001bl",
- slgfr_2 = "00000000b91b0000h",
--slfi_3 = "0000c20500000000n",
--slgfi_3 = "0000c20400000000n",
--slb_5 = "0000e30000000099l",
-+slfi_2 = "0000c20500000000n",
-+slgfi_2 = "0000c20400000000n",
-+slb_2 = "0000e30000000099l",
- slbr_2 = "00000000b9990000h",
--slbg_5 = "0000e30000000089l",
-+slbg_2 = "0000e30000000089l",
- slbgr_2 = "00000000b9890000h",
- sxr_2 = "0000000000003700g",
--sd_4 = "000000006b000000j",
-+sd_2 = "000000006b000000j",
- sdr_2 = "0000000000002b00g",
--se_4 = "000000007b000000j",
-+se_2 = "000000007b000000j",
- ser_2 = "0000000000003b00g",
--su_4 = "000000007f000000j",
-+su_2 = "000000007f000000j",
- sur_2 = "0000000000003f00g",
--sw_4 = "000000006f000000j",
-+sw_2 = "000000006f000000j",
- swr_2 = "0000000000002f00g",
- tar_2 = "00000000b24c0000h",
- tb_2 = "00000000b22c0000h",
--trace_4 = "0000000099000000q",
--tracg_5 = "0000eb000000000fs",
-+trace_3 = "0000000099000000q",
-+tracg_3 = "0000eb000000000fs",
- tre_2 = "00000000b2a50000h",
- }
- for cond,c in pairs(map_cond) do
-
-From c71a6189bb91c9ebcffc2d26192cf9f899832f5e Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Thu, 1 Dec 2016 14:42:42 -0500
-Subject: [PATCH 047/260] Fix indentation.
-
-I miss gofmt.
----
- dynasm/dasm_s390x.lua | 18 +++++++++---------
- 1 file changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 467e21828..2ee949300 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1029,16 +1029,16 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "n" then
-
- elseif p == "y" then
-- local d, x, b, a = parse_mem_bx(params[1])
-- op1 = op1 + x
-- op2 = op2 + shl(b, 12) + d
-- wputhw(op1); wputhw(op2);
-- if a then
-- werror("disp12 actions not yet implemented")
-- end
-+ local d, x, b, a = parse_mem_bx(params[1])
-+ op1 = op1 + x
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2);
-+ if a then
-+ werror("disp12 actions not yet implemented")
-+ end
- elseif p == "z" then
-- op2 = op2 + parse_gpr(params[1])
-- wputhw(op2)
-+ op2 = op2 + parse_gpr(params[1])
-+ wputhw(op2)
- end
- end
-
-
-From 77f283c328b45f65656075443757522a860a9910 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Thu, 1 Dec 2016 17:09:45 -0500
-Subject: [PATCH 048/260] Allow symbols to be used for 12-bit displacements.
-
-The parse_mem_bx function now returns a function to call to add an
-action to the action list to handle the evaluation of the
-displacement. This allows us to delay adding said action until
-after we have emitted the actions for the instruction encodings
-themselves.
-
-Code like this should now work:
-
-int x = 24
-| st r1, x(sp)
----
- dynasm/dasm_s390x.h | 10 +++++++---
- dynasm/dasm_s390x.lua | 19 ++++++++++---------
- 2 files changed, 17 insertions(+), 12 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 837a2ed0d..8b43a78cd 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -234,8 +234,10 @@ void dasm_put(Dst_DECL, int start, ...)
- case DASM_IMM16:
- case DASM_IMM32:
- case DASM_DISP20:
-- case DASM_DISP12:
- fprintf(stderr, "not implemented\n");
-+ case DASM_DISP12:
-+ CK((n>>12) == 0, RANGE_I);
-+ b[pos++] = n;
- break;
- }
- }
-@@ -296,7 +298,7 @@ int dasm_link(Dst_DECL, size_t *szp)
- case DASM_IMM32:
- case DASM_DISP20:
- case DASM_DISP12:
-- fprintf(stderr, "not implemented\n");
-+ pos++;
- break;
- }
- }
-@@ -364,8 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_IMM16:
- case DASM_IMM32:
- case DASM_DISP20:
-- case DASM_DISP12:
- fprintf(stderr, "not implemented\n");
-+ break;
-+ case DASM_DISP12:
-+ cp[-1] |= n&0xfff;
- break;
- default: *cp++ = ins; break;
- }
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 2ee949300..b3061653a 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -316,6 +316,8 @@ end
-
- -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
- -- are GPRs.
-+-- If the fourth return value is not-nil then it needs to be called to
-+-- insert an action.
- -- Encoded as: xbddd
- local function parse_mem_bx(arg)
- local d, x, b = split_memop(arg)
-@@ -326,11 +328,10 @@ local function parse_mem_bx(arg)
- end
- return dval, x, b, nil
- end
-- -- TODO: handle d being a symbol.
-- -- Action is currently the final return value (the caller needs to add it
-- -- to the action list at a later point).
-- werror("parse_mem_bx: not implemented")
-- return nil
-+ if match(d, "^[rf]1?[0-9]?") then
-+ werror("expected immediate operand, got register")
-+ end
-+ return 0, x, b, function() waction("DISP12", nil, d) end
- end
-
- -- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-@@ -1018,7 +1019,7 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then
-- werror("disp12 actions not yet implemented")
-+ a()
- end
- elseif p == "k" then
-
-@@ -1034,7 +1035,7 @@ local function parse_template(params, template, nparams, pos)
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
- if a then
-- werror("disp12 actions not yet implemented")
-+ a()
- end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
-@@ -1046,8 +1047,8 @@ end
- function op_template(params, template, nparams)
- if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
- -- Limit number of section buffer positions used by a single dasm_put().
-- -- A single opcode needs a maximum of 3 positions.
-- if secpos+3 > maxsecpos then wflush() end
-+ -- A single opcode needs a maximum of 5 positions.
-+ if secpos+5 > maxsecpos then wflush() end
- local lpos, apos, spos = #actlist, #actargs, secpos
- local ok, err
- for t in gmatch(template, "[^|]+") do
-
-From 6ae327df75bef4bcaba56f24639b09d9b2645982 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Thu, 1 Dec 2016 19:25:32 -0500
-Subject: [PATCH 049/260] Add support for RXY instructions (20-bit
- displacements).
-
----
- dynasm/dasm_s390x.h | 10 ++++++++--
- dynasm/dasm_s390x.lua | 23 ++++++++++++++++++++---
- 2 files changed, 28 insertions(+), 5 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 8b43a78cd..66dfd79a1 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -233,8 +233,11 @@ void dasm_put(Dst_DECL, int start, ...)
- break;
- case DASM_IMM16:
- case DASM_IMM32:
-- case DASM_DISP20:
- fprintf(stderr, "not implemented\n");
-+ case DASM_DISP20:
-+ CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
-+ b[pos++] = n;
-+ break;
- case DASM_DISP12:
- CK((n>>12) == 0, RANGE_I);
- b[pos++] = n;
-@@ -365,9 +368,12 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
- case DASM_IMM32:
-- case DASM_DISP20:
- fprintf(stderr, "not implemented\n");
- break;
-+ case DASM_DISP20:
-+ cp[-2] |= n&0xfff;
-+ cp[-1] |= (n>>4)&0xff00;
-+ break;
- case DASM_DISP12:
- cp[-1] |= n&0xfff;
- break;
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index b3061653a..6900944b0 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -346,8 +346,18 @@ end
- -- and b and x are GPRs.
- -- Encoded as: xblllhh (ls are the low-bits of d, and hs are the high bits).
- local function parse_mem_bxy(arg)
-- werror("parse_mem_bxy: not implemented")
-- return nil
-+ local d, x, b = split_memop(arg)
-+ local dval = tonumber(d)
-+ if dval then
-+ if not is_int20(dval) then
-+ werror("displacement out of range: ", dval)
-+ end
-+ return dval, x, b, nil
-+ end
-+ if match(d, "^[rf]1?[0-9]?") then
-+ werror("expected immediate operand, got register")
-+ end
-+ return 0, x, b, function() waction("DISP20", nil, d) end
- end
-
- local function parse_label(label, def)
-@@ -1024,7 +1034,14 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "k" then
-
- elseif p == "l" then
--
-+ local d, x, b, a = parse_mem_bxy(params[2])
-+ op0 = op0 + shl(parse_gpr(params[1]), 4) + x
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then
-+ a()
-+ end
- elseif p == "m" then
-
- elseif p == "n" then
-
-From 1b7ded54749bf76a4dc44ff3da309da131925b4f Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Thu, 1 Dec 2016 19:45:06 -0500
-Subject: [PATCH 050/260] Add support for RS-a and RSY-a instructions like stm
- and stmg.
-
----
- dynasm/dasm_s390x.lua | 57 ++++++++++++++++++++++++++-----------------
- 1 file changed, 35 insertions(+), 22 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 6900944b0..039681b49 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -307,13 +307,6 @@ local function split_memop(arg)
- return nil
- end
-
---- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
---- Encoded as: bddd
--local function parse_mem_b(arg)
-- werror("parse_mem_b: not implemented")
-- return nil
--end
--
- -- Parse memory operand of the form d(x, b) where 0 <= d < 4096 and b and x
- -- are GPRs.
- -- If the fourth return value is not-nil then it needs to be called to
-@@ -334,12 +327,14 @@ local function parse_mem_bx(arg)
- return 0, x, b, function() waction("DISP12", nil, d) end
- end
-
---- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
---- b is a GPR.
---- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
--local function parse_mem_by(arg)
-- werror("parse_mem_by: not implemented")
-- return nil
-+-- Parse memory operand of the form d(b) where 0 <= d < 4096 and b is a GPR.
-+-- Encoded as: bddd
-+local function parse_mem_b(arg)
-+ local d, x, b, a = parse_mem_bx(arg)
-+ if x ~= 0 then
-+ werror("unexpected index register")
-+ end
-+ return d, b, a
- end
-
- -- Parse memory operand of the form d(x, b) where -(2^20)/2 <= d < (2^20)/2
-@@ -360,6 +355,17 @@ local function parse_mem_bxy(arg)
- return 0, x, b, function() waction("DISP20", nil, d) end
- end
-
-+-- Parse memory operand of the form d(b) where -(2^20)/2 <= d < (2^20)/2 and
-+-- b is a GPR.
-+-- Encoded as: blllhh (ls are the low-bits of d, and hs are the high bits).
-+local function parse_mem_by(arg)
-+ local d, x, b, a = parse_mem_bxy(arg)
-+ if x ~= 0 then
-+ werror("unexpected index register")
-+ end
-+ return d, b, a
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -1028,9 +1034,7 @@ local function parse_template(params, template, nparams, pos)
- op1 = op1 + shl(parse_gpr(params[1]), 4) + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "k" then
-
- elseif p == "l" then
-@@ -1039,21 +1043,30 @@ local function parse_template(params, template, nparams, pos)
- op1 = op1 + shl(b, 12) + band(d, 0xfff)
- op2 = op2 + band(shr(d, 4), 0xff00)
- wputhw(op0); wputhw(op1); wputhw(op2)
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "m" then
-
- elseif p == "n" then
-
-+ elseif p == "q" then
-+ local d, b, a = parse_mem_b(params[3])
-+ op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op2 = op2 + shl(b, 12) + d
-+ wputhw(op1); wputhw(op2)
-+ if a then a() end
-+ elseif p == "s" then
-+ local d, b, a = parse_mem_by(params[3])
-+ op0 = op0 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-+ op1 = op1 + shl(b, 12) + band(d, 0xfff)
-+ op2 = op2 + band(shr(d, 4), 0xff00)
-+ wputhw(op0); wputhw(op1); wputhw(op2)
-+ if a then a() end
- elseif p == "y" then
- local d, x, b, a = parse_mem_bx(params[1])
- op1 = op1 + x
- op2 = op2 + shl(b, 12) + d
- wputhw(op1); wputhw(op2);
-- if a then
-- a()
-- end
-+ if a then a() end
- elseif p == "z" then
- op2 = op2 + parse_gpr(params[1])
- wputhw(op2)
-
-From 4c7e494e0aac66011bfb149c0a71c8acd0ae9c4d Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 2 Dec 2016 12:55:43 +0530
-Subject: [PATCH 051/260] Added support for Immediate addressing mode
-
-Adding support for Immediate add mode, need to check how 32 bits is returned, currently followed the displacement method.
----
- dynasm/dasm_s390x.lua | 16 +++++++++++++++-
- 1 file changed, 15 insertions(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 039681b49..c2c5a79a3 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -289,6 +289,10 @@ local function is_int20(num)
- return -shl(1, 19) <= num and num < shl(1, 19)
- end
-
-+local function is_int32(num)
-+ return -shl(1,31) <= num and num <shl(1,31)
-+end
-+
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
- -- If x is not specified then it is 0.
- local function split_memop(arg)
-@@ -366,6 +370,14 @@ local function parse_mem_by(arg)
- return d, b, a
- end
-
-+local function parse_imm(arg)
-+ local imm_val = tonumber(arg,16)
-+ if not is_int32(imm_val) then
-+ werror("Immediate value out of range: ", imm_val)
-+ end
-+ return imm_val
-+end
-+
- local function parse_label(label, def)
- local prefix = sub(label, 1, 2)
- -- =>label (pc label reference)
-@@ -1047,7 +1059,9 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "m" then
-
- elseif p == "n" then
--
-+ op0 = op0 + shl(parse_gpr(params[1], 4)
-+ local imm = parse_imm(param[2])
-+ wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
- op1 = op1 + shl(parse_gpr(params[1]), 4) + parse_gpr(params[2])
-
-From 45553891da72cccd6c5504b91ffed0045beef885 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 2 Dec 2016 14:13:55 +0530
-Subject: [PATCH 052/260] Minor change , missed out brace
-
----
- dynasm/dasm_s390x.lua | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index c2c5a79a3..536f51724 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -1059,7 +1059,7 @@ local function parse_template(params, template, nparams, pos)
- elseif p == "m" then
-
- elseif p == "n" then
-- op0 = op0 + shl(parse_gpr(params[1], 4)
-+ op0 = op0 + shl(parse_gpr(params[1]), 4)
- local imm = parse_imm(param[2])
- wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
-
-From 1d960f228643d223a67069786cecdd71b49dcd6d Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 2 Dec 2016 15:21:18 +0530
-Subject: [PATCH 053/260] Create test_z_inst.c
-
-Added examples folder
-Added test code to test basic instructions like add , sub and msr
-This code is in processes of further expansion and tuning
----
- dynasm/Examples/test_z_inst.c | 80 +++++++++++++++++++++++++++++++++++
- 1 file changed, 80 insertions(+)
- create mode 100644 dynasm/Examples/test_z_inst.c
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-new file mode 100644
-index 000000000..314ea0c34
---- /dev/null
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -0,0 +1,80 @@
-+#include <assert.h>
-+#include <stdio.h>
-+#include <sys/mman.h>
-+
-+#include "../dynasm/dasm_proto.h"
-+#include "../dynasm/dasm_s390x.h"
-+
-+//DynASM directives.
-+ |.arch s390x
-+ |.actionlist actions
-+
-+/* Instructio modes
-+ mode 0 : RR Mode
-+ mode 1 : I Mode
-+*/
-+
-+void *jitcode(dasm_State **state);
-+void add(dasm_State * , int);
-+void sub(dasm_State * , int);
-+void mul(dasm_State * , int);
-+
-+void *jitcode(dasm_State **state)
-+{
-+ size_t size;
-+ int dasm_status = dasm_link(state, &size);
-+ assert(dasm_status == DASM_S_OK);
-+
-+ void *ret = (int *)calloc(10,sizeof(int));
-+ dasm_encode(state, ret);
-+ dasm_free(state);
-+
-+ return (int *)ret;
-+}
-+
-+void add(dasm_State *state)
-+{
-+ dasm_State ** Dst = &state;
-+
-+ | ar r2,r3
-+ | br r14
-+}
-+
-+void sub(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | sr r2,r3
-+ | br r14
-+}
-+
-+void mul(dasm_State *state)
-+{
-+ dasm_State **Dst = &state;
-+
-+ | msr r2 , r3
-+ | br r14
-+}
-+
-+void main(int argc, char *argv[])
-+{
-+ dasm_State *state;
-+ dasm_State **Dst = &state;
-+ int num1 , num2;
-+ int *ret;
-+ size_t size;
-+
-+ int* (*fptr)(int , int) = jitcode(&state);
-+
-+ num1 = atoi(argv[1]);
-+ num2 = atoi(argv[2]);
-+
-+ dasm_init(&state, 1);
-+ dasm_setup(&state, actions);
-+
-+ /* Call respective test function */
-+ sub(state);
-+
-+ ret = fptr(num1 , num2);
-+ printf("The value is %d\n" ,ret);
-+}
-
-From f0cc29436c7bab9027352edfd03ccaba0af9f8a9 Mon Sep 17 00:00:00 2001
-From: ketank-new <ketan22584@gmail.com>
-Date: Fri, 2 Dec 2016 15:46:45 +0530
-Subject: [PATCH 054/260] Update test_z_inst.c
-
-added functionality to test different modes of same instruction type
----
- dynasm/Examples/test_z_inst.c | 36 +++++++++++++++++++++++++++--------
- 1 file changed, 28 insertions(+), 8 deletions(-)
-
-diff --git a/dynasm/Examples/test_z_inst.c b/dynasm/Examples/test_z_inst.c
-index 314ea0c34..65ca39ac2 100644
---- a/dynasm/Examples/test_z_inst.c
-+++ b/dynasm/Examples/test_z_inst.c
-@@ -32,15 +32,35 @@ void *jitcode(dasm_State **state)
- return (int *)ret;
- }
-
--void add(dasm_State *state)
-+void add(dasm_State *state , int mode)
- {
- dasm_State ** Dst = &state;
--
-- | ar r2,r3
-- | br r14
-+
-+ switch(mode)
-+ {
-+ /* Case RR instruction mode */
-+ case 0:
-+ {
-+ | ar r2,r3
-+ | br r14
-+ break;
-+ }
-+ /* Case RIL instruction mode */
-+ case 1:
-+ {
-+ | ar r2,0x16
-+ | br r14
-+ break;
-+ }
-+ default:
-+ {
-+ printf( " Mode not recognised \n ");
-+ break;
-+ }
-+ }
- }
-
--void sub(dasm_State *state)
-+void sub(dasm_State *state , int mode)
- {
- dasm_State **Dst = &state;
-
-@@ -48,7 +68,7 @@ void sub(dasm_State *state)
- | br r14
- }
-
--void mul(dasm_State *state)
-+void mul(dasm_State *state, int mode)
- {
- dasm_State **Dst = &state;
-
-@@ -73,8 +93,8 @@ void main(int argc, char *argv[])
- dasm_setup(&state, actions);
-
- /* Call respective test function */
-- sub(state);
-+ add(state , 0);
-
- ret = fptr(num1 , num2);
-- printf("The value is %d\n" ,ret);
-+ printf("Result is %d\n" ,ret);
- }
-
-From b97a7f7b44fd0643d2032cd66362b1476e5eabb6 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 2 Dec 2016 17:19:29 +0530
-Subject: [PATCH 055/260] Minor cleanup and modified 32 bit signed check
-
-Modified 32 bit signed check for the immediate value
----
- dynasm/dasm_s390x.lua | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.lua b/dynasm/dasm_s390x.lua
-index 536f51724..76d770e79 100644
---- a/dynasm/dasm_s390x.lua
-+++ b/dynasm/dasm_s390x.lua
-@@ -290,7 +290,7 @@ local function is_int20(num)
- end
-
- local function is_int32(num)
-- return -shl(1,31) <= num and num <shl(1,31)
-+ return -2147483648 <= num and num < 2147483648
- end
-
- -- Split a memory operand of the form d(b) or d(x,b) into d, x and b.
-@@ -1060,7 +1060,7 @@ local function parse_template(params, template, nparams, pos)
-
- elseif p == "n" then
- op0 = op0 + shl(parse_gpr(params[1]), 4)
-- local imm = parse_imm(param[2])
-+ local imm = parse_imm(params[2])
- wputhw(op0); waction("IMM32", nil, imm)
- elseif p == "q" then
- local d, b, a = parse_mem_b(params[3])
-
-From f0dd40dc50b626bf58acdba70b65572c11485dce Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 2 Dec 2016 17:37:20 +0530
-Subject: [PATCH 056/260] Adding support for Immediate add mode
-
-The masking in immediate mode might not be proper. I could understand that you had masked 12bits and then 8bits to get the displacement in place for 20-bit displacement ( cp[-2] |= n&0xfff; cp[-1] |= (n>>4)&0xff00;) But in my case I need all the 32bits, so not sure how to go about it. Currently I have just used "n" since no point in "and with 0xffff" But I am getting core dump. Please Let me know your comments on these.
----
- dynasm/dasm_s390x.h | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 66dfd79a1..6314ff805 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -233,7 +233,10 @@ void dasm_put(Dst_DECL, int start, ...)
- break;
- case DASM_IMM16:
- case DASM_IMM32:
-- fprintf(stderr, "not implemented\n");
-+ CK((n>>32) == 0, RANGE_I);
-+ b[pos++]=n;
-+ break;
-+ //fprintf(stderr, "not implemented\n");
- case DASM_DISP20:
- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
- b[pos++] = n;
-@@ -368,7 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
- case DASM_IMM32:
-- fprintf(stderr, "not implemented\n");
-+ //pintf(stderr, "not implemented\n");
-+ cp[-1] |= n
-+ cp[-2] |= n
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
-
-From 3d5c692e13c38ea1d09e562e3f8dd5e6157dd217 Mon Sep 17 00:00:00 2001
-From: niravthakkar <thakkarniravb@gmail.com>
-Date: Fri, 2 Dec 2016 17:39:00 +0530
-Subject: [PATCH 057/260] Minor change: Cleanup
-
----
- dynasm/dasm_s390x.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index 6314ff805..ccfe98f12 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -373,7 +373,7 @@ int dasm_encode(Dst_DECL, void *buffer)
- case DASM_IMM32:
- //pintf(stderr, "not implemented\n");
- cp[-1] |= n
-- cp[-2] |= n
-+ cp[-2] |= (n >>4)
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
-
-From 621ae87058da541506e0b43240352e09d76ddd60 Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Fri, 2 Dec 2016 13:06:03 -0500
-Subject: [PATCH 058/260] Cleanup and fix compilation.
-
----
- dynasm/dasm_s390x.h | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index ccfe98f12..d3039815a 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -232,11 +232,14 @@ void dasm_put(Dst_DECL, int start, ...)
- b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-+ ofs += 2;
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-+ ofs += 4;
- CK((n>>32) == 0, RANGE_I);
- b[pos++]=n;
- break;
-- //fprintf(stderr, "not implemented\n");
- case DASM_DISP20:
- CK(-(1<<19) <= n && n < (1<<19), RANGE_I);
- b[pos++] = n;
-@@ -370,10 +373,11 @@ int dasm_encode(Dst_DECL, void *buffer)
- break;
- case DASM_LABEL_PC: break;
- case DASM_IMM16:
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-- //pintf(stderr, "not implemented\n");
-- cp[-1] |= n
-- cp[-2] |= (n >>4)
-+ *cp++ = n >> 16;
-+ *cp++ = n;
- break;
- case DASM_DISP20:
- cp[-2] |= n&0xfff;
-
-From fc2b633532dc80058d18af3a14ebf3931982a7ff Mon Sep 17 00:00:00 2001
-From: Michael Munday <munday@ca.ibm.com>
-Date: Fri, 2 Dec 2016 13:41:45 -0500
-Subject: [PATCH 059/260] Auto-format dasm_s390x.h.
-
-I did this mostly to get rid of the annoying tabs/spaces mix in this
-file. It has the side effect of forcing newlines before statements
-which I think is a better style (and not particularly inconsistent
-with the original which used both styles). Other than that I've tried
-to match the original style as closely as possible.
-
-Generated with this command:
-
-indent -i2 -brs -cli0 -br -ce -npcs -nbc -di1 -npsl -ncs dasm_s390x.h
----
- dynasm/dasm_s390x.h | 269 +++++++++++++++++++++++++++-----------------
- 1 file changed, 168 insertions(+), 101 deletions(-)
-
-diff --git a/dynasm/dasm_s390x.h b/dynasm/dasm_s390x.h
-index d3039815a..a5daaa354 100644
---- a/dynasm/dasm_s390x.h
-+++ b/dynasm/dasm_s390x.h
-@@ -21,7 +21,9 @@ enum {
- /* The following actions need a buffer position. */
- DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
- /* The following actions also have an argument. */
-- DASM_REL_PC, DASM_LABEL_PC, DASM_DISP12, DASM_DISP20, DASM_IMM16, DASM_IMM32,
-+ DASM_REL_PC, DASM_LABEL_PC,
-+ DASM_DISP12, DASM_DISP20,
-+ DASM_IMM16, DASM_IMM32,
- DASM__MAX
- };
-
-@@ -53,12 +55,12 @@ typedef const unsigned short *dasm_ActList;
-
- /* Per-section structure. */
- typedef struct dasm_Section {
-- int *rbuf; /* Biased buffer pointer (negative section bias). */
-- int *buf; /* True buffer pointer. */
-- size_t bsize; /* Buffer size in bytes. */
-- int pos; /* Biased buffer position. */
-- int epos; /* End of biased buffer position - max single put. */
-- int ofs; /* Byte offset into section. */
-+ int *rbuf; /* Biased buffer pointer (negative section bias). */
-+ int *buf; /* True buffer pointer. */
-+ size_t bsize; /* Buffer size in bytes. */
-+ int pos; /* Biased buffer position. */
-+ int epos; /* End of biased buffer position - max single put. */
-+ int ofs; /* Byte offset into section. */
- } dasm_Section;
-
- /* Core structure holding the DynASM encoding state. */
-@@ -98,10 +100,10 @@ void dasm_init(Dst_DECL, int maxsection)
- D->globals = NULL;
- D->maxsection = maxsection;
- for (i = 0; i < maxsection; i++) {
-- D->sections[i].buf = NULL; /* Need this for pass3. */
-+ D->sections[i].buf = NULL; /* Need this for pass3. */
- D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
- D->sections[i].bsize = 0;
-- D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
-+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
- }
- }
-
-@@ -113,8 +115,10 @@ void dasm_free(Dst_DECL)
- for (i = 0; i < D->maxsection; i++)
- if (D->sections[i].buf)
- DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
-- if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-- if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
-+ if (D->pclabels)
-+ DASM_M_FREE(Dst, D->pclabels, D->pcsize);
-+ if (D->lglabels)
-+ DASM_M_FREE(Dst, D->lglabels, D->lgsize);
- DASM_M_FREE(Dst, D, D->psize);
- }
-
-@@ -122,8 +126,8 @@ void dasm_free(Dst_DECL)
- void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
- {
- dasm_State *D = Dst_REF;
-- D->globals = gl - 10; /* Negative bias to compensate for locals. */
-- DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
-+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
-+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int));
- }
-
- /* Grow PC label array. Can be called after dasm_setup(), too. */
-@@ -131,8 +135,8 @@ void dasm_growpc(Dst_DECL, unsigned int maxpc)
- {
- dasm_State *D = Dst_REF;
- size_t osz = D->pcsize;
-- DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
-- memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
-+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int));
-+ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
- }
-
- /* Setup encoder. */
-@@ -140,11 +144,12 @@ void dasm_setup(Dst_DECL, const void *actionlist)
- {
- dasm_State *D = Dst_REF;
- int i;
-- D->actionlist = (dasm_ActList)actionlist;
-+ D->actionlist = (dasm_ActList) actionlist;
- D->status = DASM_S_OK;
- D->section = &D->sections[0];
- memset((void *)D->lglabels, 0, D->lgsize);
-- if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
-+ if (D->pclabels)
-+ memset((void *)D->pclabels, 0, D->pcsize);
- for (i = 0; i < D->maxsection; i++) {
- D->sections[i].pos = DASM_SEC2POS(i);
- D->sections[i].ofs = 0;
-@@ -176,9 +181,10 @@ void dasm_put(Dst_DECL, int start, ...)
-
- if (pos >= sec->epos) {
- DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
-- sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
-+ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int));
- sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
-- sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
-+ sec->epos =
-+ (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
- }
-
- b = sec->rbuf;
-@@ -193,60 +199,84 @@ void dasm_put(Dst_DECL, int start, ...)
- } else {
- int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
- switch (action) {
-- case DASM_STOP: goto stop;
-+ case DASM_STOP:
-+ goto stop;
- case DASM_SECTION:
-- n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
-- D->section = &D->sections[n]; goto stop;
-- case DASM_ESC: p++; ofs += 4; break;
-- case DASM_REL_EXT: break;
-- case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
-+ n = (ins & 255);
-+ CK(n < D->maxsection, RANGE_SEC);
-+ D->section = &D->sections[n];
-+ goto stop;
-+ case DASM_ESC:
-+ p++;
-+ ofs += 4;
-+ break;
-+ case DASM_REL_EXT:
-+ break;
-+ case DASM_ALIGN:
-+ ofs += (ins & 255);
-+ b[pos++] = ofs;
-+ break;
- case DASM_REL_LG:
-- n = (ins & 2047) - 10; pl = D->lglabels + n;
-+ n = (ins & 2047) - 10;
-+ pl = D->lglabels + n;
- /* Bkwd rel or global. */
-- if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
-- pl += 10; n = *pl;
-- if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
-+ if (n >= 0) {
-+ CK(n >= 10 || *pl < 0, RANGE_LG);
-+ CKPL(lg, LG);
-+ goto putrel;
-+ }
-+ pl += 10;
-+ n = *pl;
-+ if (n < 0)
-+ n = 0; /* Start new chain for fwd rel if label exists. */
- goto linkrel;
- case DASM_REL_PC:
-- pl = D->pclabels + n; CKPL(pc, PC);
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
- putrel:
- n = *pl;
-- if (n < 0) { /* Label exists. Get label pos and store it. */
-+ if (n < 0) { /* Label exists. Get label pos and store it. */
- b[pos] = -n;
- } else {
-- linkrel:
-- b[pos] = n; /* Else link to rel chain, anchored at label. */
-+ linkrel:
-+ b[pos] = n; /* Else link to rel chain, anchored at label. */
- *pl = pos;
- }
- pos++;
- break;
- case DASM_LABEL_LG:
-- pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
-+ pl = D->lglabels + (ins & 2047) - 10;
-+ CKPL(lg, LG);
-+ goto putlabel;
- case DASM_LABEL_PC:
-- pl = D->pclabels + n; CKPL(pc, PC);
-+ pl = D->pclabels + n;
-+ CKPL(pc, PC);
- putlabel:
-- n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-- while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
-+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
-+ while (n > 0) {
-+ int *pb = DASM_POS2PTR(D, n);
-+ n = *pb;
-+ *pb = pos;
- }
-- *pl = -pos; /* Label exists now. */
-- b[pos++] = ofs; /* Store pass1 offset estimate. */
-+ *pl = -pos; /* Label exists now. */
-+ b[pos++] = ofs; /* Store pass1 offset estimate. */
- break;
- case DASM_IMM16:
-- ofs += 2;
-- fprintf(stderr, "DASM_IMM16 not implemented\n");
-- break;
-+ ofs += 2;
-+ fprintf(stderr, "DASM_IMM16 not implemented\n");
-+ break;
- case DASM_IMM32:
-- ofs += 4;
-- CK((n>>32) == 0, RANGE_I);
-- b[pos++]=n;
-- break;
-+ ofs += 4;
-+ CK((n >> 32) == 0, RANGE